Skip to main content

solo_api/
http.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! HTTP/JSON transport for Solo. Local-only by default — binds to
4//! `127.0.0.1:<port>` and serves the same operations the MCP server
5//! exposes:
6//!
7//! Episode operations:
8//!   - `POST /memory`                — remember (body: { content, source_type?, source_id? })
9//!   - `POST /memory/search`         — recall  (body: { query, limit? })
10//!   - `POST /memory/context`        — recall + themes + facts + contradictions bundle
11//!   - `GET  /memory/{id}`           — inspect
12//!   - `PATCH /memory/{id}`           — correct/update one active memory
13//!   - `DELETE /memory/{id}?reason=…` — forget
14//!
15//! Maintenance:
16//!   - `POST /memory/consolidate`    — trigger a consolidation pass
17//!   - `POST /backup`                — encrypted online backup
18//!
19//! Derived-layer (v0.4.0+; queries against the Steward's outputs):
20//!   - `GET  /memory/themes?window_days=N&limit=K`
21//!   - `GET  /memory/facts_about?subject=X&predicate=Y&since_ms=N&until_ms=N&include_as_object=B&limit=K`
22//!   - `GET  /memory/entities?query=X&limit=K`
23//!   - `GET  /memory/contradictions?limit=K`
24//!   - `POST /memory/contradictions/resolve`
25//!   - `GET  /memory/clusters/{cluster_id}?full_content=true` (v0.5.0+)
26//!
27//! Document operations (v0.7.0+):
28//!   - `POST   /memory/documents`               — ingest a file
29//!   - `POST   /memory/documents/search`        — vector search over chunks
30//!   - `GET    /memory/documents`               — paginate documents
31//!   - `GET    /memory/documents/{id}`          — inspect one document
32//!   - `DELETE /memory/documents/{id}`          — soft-delete a document
33//!
34//! There's no auth at this layer. The threat model is local-machine
35//! single-user; binding to `127.0.0.1` keeps the surface off the LAN.
36//! A future commit can add bearer-token auth + LAN binding.
37//!
38//! ## Lifecycle
39//!
40//! `serve_http(addr, server, shutdown)` binds to `addr`, runs axum with
41//! `with_graceful_shutdown(shutdown)`, returns when shutdown fires or
42//! the listener errors. `solo http-serve` invokes this from inside a
43//! `OneShotContext`, so writer + reader pool + lockfile stay live for
44//! the server's lifetime and clean up properly afterwards.
45
46use std::convert::Infallible;
47use std::net::SocketAddr;
48use std::str::FromStr;
49use std::sync::Arc;
50use std::time::Duration;
51
52use axum::extract::{FromRequestParts, Path, Query, State};
53use axum::http::request::Parts;
54use axum::http::{HeaderValue, Method, StatusCode};
55use axum::response::sse::{Event, KeepAlive, Sse};
56use axum::response::{IntoResponse, Response};
57use axum::routing::{get, post};
58use axum::{Json, Router};
59use futures::Stream;
60use serde::{Deserialize, Serialize};
61use solo_core::{
62    Confidence, DocumentId, EncodingContext, Episode, InvalidateEvent, MemoryId, TenantId, Tier,
63};
64use solo_storage::{TenantHandle, TenantRegistry};
65use tokio::sync::broadcast;
66use tower_http::cors::{AllowOrigin, CorsLayer};
67use tower_http::trace::TraceLayer;
68
69use crate::auth::{AuthConfig, AuthenticatedPrincipal, middleware::AuthValidator};
70
71/// HTTP-side application state. v0.8.0 P2 swapped per-handler `WriteHandle
72/// + ReaderPool + ...` for a `TenantRegistry` that resolves tenant on each
73/// request via the `X-Solo-Tenant` header (default tenant if absent).
74#[derive(Clone)]
75pub struct SoloHttpState {
76    /// Multi-tenant registry. Lazy-loads tenants on first request.
77    pub registry: Arc<TenantRegistry>,
78    /// Default tenant used when the `X-Solo-Tenant` header is absent.
79    /// Typically `TenantId::default_tenant()`.
80    pub default_tenant: TenantId,
81    /// Read-path aliases for the canonical `"user"` subject. Sourced
82    /// from `solo.config.toml` `[identity] user_aliases`; threaded
83    /// through to `solo_query::facts_about` so a query for `"alex"`
84    /// also surfaces rows historically extracted as `"user"`. Empty
85    /// vec = behave as today. Wrapped in `Arc` so handler `clone()`s
86    /// stay cheap. v0.5.0 Priority 1 sub-step 1C.
87    pub user_aliases: Arc<Vec<String>>,
88    /// v0.11.0 P1: MCP `Mcp-Session-Id` session store. In-memory,
89    /// TTL-bounded (30 min inactivity / 4 hr absolute). The middleware
90    /// on the `/mcp` route validates request headers against this
91    /// store; the POST handler creates new entries on the first
92    /// request without a session id. See
93    /// `crates/solo-api/src/mcp_session.rs` +
94    /// `docs/dev-log/0132-v0.11.0-implementation-plan.md` §3 Decision A.
95    pub mcp_sessions: crate::mcp_session::SessionStore,
96}
97
98/// HTTP header that routes a request to a specific tenant. Optional;
99/// absent → state.default_tenant.
100pub const TENANT_HEADER: &str = "x-solo-tenant";
101
102/// Axum extractor that resolves the request's target tenant, then
103/// lazy-opens the tenant via the registry.
104///
105/// Resolution order (v0.8.0 P3):
106///   1. `AuthenticatedPrincipal.tenant_claim` from request extensions —
107///      set by the auth middleware. In OIDC mode this is the validated
108///      value of the configured custom claim (default `solo_tenant`);
109///      in bearer mode this is the daemon's default tenant.
110///   2. `X-Solo-Tenant` header — falls back to this when no
111///      authenticated principal is on the request (unauthenticated
112///      loopback deployments — the default).
113///   3. `state.default_tenant` when neither is present.
114///
115/// Bad header values → 400. Lazy-open failures → 500 unless the failure
116/// kind is `NotFound` (unknown tenant id) → 404.
117pub struct TenantExtractor(pub Arc<TenantHandle>);
118
119impl<S> FromRequestParts<S> for TenantExtractor
120where
121    SoloHttpState: FromRef<S>,
122    S: Send + Sync,
123{
124    type Rejection = ApiError;
125
126    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
127        let state = SoloHttpState::from_ref(state);
128        // Order: (1) principal.tenant_claim (set by auth middleware),
129        // (2) X-Solo-Tenant header, (3) state.default_tenant.
130        //
131        // The principal wins because in OIDC mode the JWT is the source
132        // of truth — letting the header override an OIDC claim would
133        // be a tenant-impersonation hole.
134        let resolved = if let Some(principal) = parts.extensions.get::<AuthenticatedPrincipal>()
135            && let Some(claim) = principal.tenant_claim.clone()
136        {
137            claim
138        } else {
139            match parts.headers.get(TENANT_HEADER) {
140                None => state.default_tenant.clone(),
141                Some(raw) => {
142                    let s = raw.to_str().map_err(|e| {
143                        ApiError::bad_request(format!(
144                            "{TENANT_HEADER}: header value must be ASCII ({e})"
145                        ))
146                    })?;
147                    TenantId::new(s.to_string()).map_err(|e| {
148                        ApiError::bad_request(format!("{TENANT_HEADER}: invalid tenant id: {e}"))
149                    })?
150                }
151            }
152        };
153        let handle = state.registry.get_or_open(&resolved).await.map_err(|e| {
154            // Map NotFound → 404; everything else → 500.
155            use solo_core::Error;
156            match &e {
157                Error::NotFound(_) => ApiError::not_found(e.to_string()),
158                Error::InvalidInput(_) => ApiError::bad_request(e.to_string()),
159                _ => ApiError::internal(e.to_string()),
160            }
161        })?;
162        Ok(TenantExtractor(handle))
163    }
164}
165
166use axum::extract::FromRef;
167
168/// v0.8.0 P4: extractor that pulls the authenticated principal's
169/// `subject` (JWT `sub` or `"bearer"`) out of request extensions for the
170/// audit log. `None` when no `AuthenticatedPrincipal` is present
171/// (unauthenticated loopback deployments).
172pub struct AuditPrincipal(pub Option<String>);
173
174impl<S> FromRequestParts<S> for AuditPrincipal
175where
176    S: Send + Sync,
177{
178    type Rejection = std::convert::Infallible;
179
180    async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {
181        Ok(AuditPrincipal(
182            parts
183                .extensions
184                .get::<AuthenticatedPrincipal>()
185                .map(|p| p.subject.clone()),
186        ))
187    }
188}
189
190/// v0.10.0: extractor that lifts the full `AuthenticatedPrincipal` out
191/// of request extensions for the `/v1/tenants` handler. Distinct from
192/// `AuditPrincipal` (which only carries `subject: Option<String>`) — the
193/// tenant-list handler needs the `tenant_claim` and `claims` fields to
194/// distinguish bearer (claims = Null) from OIDC (claims = JWT object)
195/// principals.
196///
197/// `None` when no `AuthenticatedPrincipal` is on the request — the
198/// unauthenticated loopback deployment path, which the tenant-list
199/// handler treats as "all tenants visible" (same scope as the
200/// `solo tenants list` CLI). See `docs/dev-log/0119-tenants-list-impl.md`
201/// for the three-case visibility rule.
202pub struct MaybePrincipal(pub Option<AuthenticatedPrincipal>);
203
204impl<S> FromRequestParts<S> for MaybePrincipal
205where
206    S: Send + Sync,
207{
208    type Rejection = std::convert::Infallible;
209
210    async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {
211        Ok(MaybePrincipal(
212            parts.extensions.get::<AuthenticatedPrincipal>().cloned(),
213        ))
214    }
215}
216
217/// Build the router with optional bearer-token auth (v0.7.x legacy shape).
218///
219/// When `bearer_token` is `Some(t)`, every request except `GET /health`
220/// + `GET /openapi.json` (unauthenticated probes / machine-readable spec)
221/// requires `Authorization: Bearer t`. v0.8.0 P3 routes this through the
222/// new `AuthValidator::Bearer` middleware so an `AuthenticatedPrincipal`
223/// is attached to every authenticated request (the `TenantExtractor`
224/// reads `principal.tenant_claim` ahead of the `X-Solo-Tenant` header).
225pub fn router_with_auth(state: SoloHttpState, bearer_token: Option<String>) -> Router {
226    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
227    router_with_auth_config(state, auth)
228}
229
230/// Build the router with a config-driven auth block (v0.8.0 P3+).
231///
232/// `auth = Some(AuthConfig::Bearer { token })` is equivalent to passing
233/// `Some(token)` to [`router_with_auth`]. `auth = Some(AuthConfig::Oidc { … })`
234/// installs the OIDC middleware (JWKS fetch + cache + sig + claim checks).
235/// `auth = None` runs unauthenticated — same `127.0.0.1` default as v0.7.x.
236///
237/// Public routes (`/health`, `/openapi.json`) are always exempt from
238/// auth — load balancers, uptime monitors, and codegen tools shouldn't
239/// need credentials.
240pub fn router_with_auth_config(state: SoloHttpState, auth: Option<AuthConfig>) -> Router {
241    let cors = build_cors_layer();
242    // Public, always-unauthenticated routes:
243    //   - GET /health: liveness probe (load balancers, uptime monitors).
244    //   - GET /openapi.json: machine-readable API description for client
245    //     codegen + browser-UI tooling (TypeScript / OpenAPI Generator,
246    //     curl-tools, etc.). The spec describes the API shape, not
247    //     secrets — fine to serve unauthenticated even on a LAN-bound
248    //     instance.
249    let public = Router::new()
250        .route("/health", get(|| async { "ok" }))
251        .route("/openapi.json", get(openapi_handler));
252
253    let authed = Router::new()
254        .route("/memory", post(remember_handler))
255        .route("/memory/search", post(recall_handler))
256        .route("/memory/context", post(memory_context_handler))
257        .route("/memory/consolidate", post(consolidate_handler))
258        .route(
259            "/memory/{id}",
260            get(inspect_handler)
261                .patch(update_handler)
262                .delete(forget_handler),
263        )
264        .route("/backup", post(backup_handler))
265        // Path 1 derived-layer endpoints (v0.4.0+). GET-shaped because
266        // these are pure read-only queries; query-string params for
267        // simple filters keep them curl-friendly without a JSON body.
268        .route("/memory/themes", get(themes_handler))
269        .route("/memory/facts_about", get(facts_about_handler))
270        .route("/memory/entities", get(entities_handler))
271        .route("/memory/contradictions", get(contradictions_handler))
272        .route(
273            "/memory/contradictions/resolve",
274            post(contradiction_resolve_handler),
275        )
276        // v0.5.0 Priority 3: drill into one cluster + abstraction +
277        // episodes. Two-segment path (`/memory/clusters/{id}`) so it
278        // does not shadow the single-segment `/memory/{id}` UUID
279        // inspect route.
280        .route(
281            "/memory/clusters/{cluster_id}",
282            get(inspect_cluster_handler),
283        )
284        // v0.7.0 P6: document operations. Two-segment paths
285        // (`/memory/documents/...`) so they don't shadow the
286        // single-segment `/memory/{id}` episode-inspect route. Order
287        // matters: register the literal `/memory/documents/search`
288        // ahead of `/memory/documents/{id}` so axum's matcher prefers
289        // the literal over the path parameter.
290        .route("/memory/documents/search", post(search_docs_handler))
291        .route(
292            "/memory/documents",
293            post(ingest_document_handler).get(list_documents_handler),
294        )
295        .route(
296            "/memory/documents/{id}",
297            get(inspect_document_handler).delete(forget_document_handler),
298        )
299        // v0.9.x: graph drill-down for solo-web. Read-only neighbor
300        // expansion off any node in the memory graph. See
301        // `docs/dev-log/0105-solo-web-scoping.md` §4 + the impl dev log
302        // for the full `/v1/graph/*` family this is the first of.
303        .route("/v1/graph/expand", get(graph_expand_handler))
304        // v0.10.0: paginated catalog reads for solo-web's initial graph
305        // render. See `docs/dev-log/0114-graph-nodes-edges-impl.md`
306        // alongside the same scoping doc.
307        .route("/v1/graph/nodes", get(graph_nodes_handler))
308        .route("/v1/graph/edges", get(graph_edges_handler))
309        // v0.10.0: kind-discriminated full-record drill for solo-web's
310        // inspector panel. See `docs/dev-log/0115-graph-inspect-impl.md`.
311        .route("/v1/graph/inspect/{id}", get(graph_inspect_handler))
312        // v0.10.0: unified explicit + HNSW-semantic neighbors for solo-
313        // web's "show similar" overlay. See
314        // `docs/dev-log/0116-graph-neighbors-impl.md`.
315        .route("/v1/graph/neighbors/{id}", get(graph_neighbors_handler))
316        // v0.10.0: Server-Sent Events stream of graph-data invalidations
317        // for solo-web's live update story. The wire format is
318        // INVALIDATION-shaped (`{reason, tenant_id, ts_ms, kind}`) per
319        // scoping doc §3 Decision C — clients refetch the affected page
320        // on each event rather than receiving row payloads. See
321        // `docs/dev-log/0117-graph-stream-impl.md`.
322        .route("/v1/graph/stream", get(graph_stream_handler))
323        // Authenticated readiness/status surface for local UIs and
324        // agent bridges. `/health` stays public and tiny; this route
325        // resolves the tenant and reports operator-facing JSON.
326        .route("/v1/status", get(status_handler))
327        // v0.10.0: principal-scoped tenant list for solo-web's top-bar
328        // tenant picker. Read-only — admin CRUD (create/delete) remains
329        // CLI-only per ADR-0004 §"Admin operations". The visibility
330        // filter is principal-driven: no-auth + bearer principals see
331        // every active tenant; OIDC principals see only the tenant
332        // named by their `tenant_claim`. See
333        // `docs/dev-log/0119-tenants-list-impl.md` + scoping doc §3
334        // Decision F + §4 Route 6.
335        .route("/v1/tenants", get(tenants_list_handler))
336        .with_state(state.clone());
337
338    // v0.10.2: MCP-over-HTTP transport on /mcp. Lets one Solo process
339    // serve both `/v1/graph/*` (REST, for solo-web) and `/mcp`
340    // (JSON-RPC, for solo-jarvis) without the
341    // single-writer-per-data-dir lock dance. See
342    // `docs/dev-log/0129-v0.10.2-mcp-over-http-impl.md` for the spec.
343    // POST + GET share the same path; axum's `MethodRouter` muxes by
344    // HTTP method. OPTIONS is handled by the `CorsLayer` (already
345    // wired below) — we don't need an explicit handler.
346    //
347    // v0.11.0 P1: the route gets its own session middleware layer
348    // (`mcp_session_middleware`) that validates the `Mcp-Session-Id`
349    // request header against the per-process `SessionStore`. Expired
350    // / unknown sessions return 404 with a re-init instruction; the
351    // POST handler creates a new session on a request that arrived
352    // without the header and echoes the assigned id back via
353    // `Mcp-Session-Id` response header. The middleware lives on this
354    // sub-router (not the outer `authed`) so the rest of the API
355    // surface is unaffected — only `/mcp` carries session semantics.
356    let mcp_router: Router<SoloHttpState> = Router::new()
357        .route(
358            "/mcp",
359            post(mcp_http_post_handler).get(mcp_http_get_handler),
360        )
361        .layer(axum::middleware::from_fn_with_state(
362            state.mcp_sessions.clone(),
363            crate::mcp_session::mcp_session_middleware,
364        ));
365    let authed = authed.merge(mcp_router.with_state(state.clone()));
366
367    let authed = if let Some(cfg) = auth {
368        // v0.8.0 P3: dispatch via AuthValidator (bearer | OIDC), inserts
369        // AuthenticatedPrincipal into request extensions for the
370        // TenantExtractor + audit-log to read.
371        let validator = Arc::new(AuthValidator::from_config(
372            &cfg,
373            state.default_tenant.clone(),
374        ));
375        authed.layer(axum::middleware::from_fn_with_state(
376            validator,
377            crate::auth::middleware::auth_middleware,
378        ))
379    } else {
380        authed
381    };
382
383    public
384        .merge(authed)
385        .layer(cors)
386        .layer(TraceLayer::new_for_http())
387}
388
389/// Convenience wrapper: no auth (loopback-only deployments).
390pub fn router(state: SoloHttpState) -> Router {
391    router_with_auth_config(state, None)
392}
393
394fn build_cors_layer() -> CorsLayer {
395    // Permissive-localhost CORS: allow any localhost / 127.0.0.1 origin so
396    // browser-based UIs running on a different local port can call the API
397    // without preflight friction. We do NOT use `Any` because that would
398    // allow arbitrary remote origins to talk to our localhost server via
399    // a victim's browser. With bearer-token auth enabled the practical
400    // impact is reduced (the cross-origin attacker still can't supply
401    // the token), but principle of least privilege says refuse anyway.
402    //
403    // When the server is bound to a non-loopback address (auth required),
404    // the same CORS predicate keeps localhost-only browser clients —
405    // suitable for trusted-LAN deployments where the LAN client itself
406    // tunnels through ssh/wireguard back to localhost. Wider CORS for
407    // genuine cross-origin browser use is a future config knob.
408    CorsLayer::new()
409        .allow_origin(AllowOrigin::predicate(|origin: &HeaderValue, _req| {
410            origin.to_str().map(is_localhost_origin).unwrap_or(false)
411        }))
412        .allow_methods([
413            Method::GET,
414            Method::POST,
415            Method::PATCH,
416            Method::DELETE,
417            Method::OPTIONS,
418        ])
419        .allow_headers([
420            axum::http::header::CONTENT_TYPE,
421            axum::http::header::AUTHORIZATION,
422            // Custom Solo headers — browsers preflight-check these and
423            // refuse the actual request if they're not in the allow list.
424            // Without `x-solo-tenant` solo-web's browser fetches all fail
425            // with "Failed to fetch" (CORS preflight rejection).
426            axum::http::HeaderName::from_static("x-solo-tenant"),
427            // v0.10.2: `Mcp-Session-Id` is part of the MCP Streamable
428            // HTTP transport spec (sessions, resumable streams). v0.11.0
429            // P1/P2 implement the real session affinity + resumable GET
430            // stream behind this header; the allow-list entry was
431            // pre-wired in v0.10.2 so browser-based MCP clients that
432            // preflight for it (per the spec) succeed instead of
433            // failing with a CORS error before the first request even
434            // lands.
435            axum::http::HeaderName::from_static("mcp-session-id"),
436            // v0.11.0 P2: `Last-Event-ID` is the SSE-spec header carrying
437            // the client's last-seen event id on reconnect. The
438            // resumable `GET /mcp` handler reads it and replays the
439            // missed events from the per-session ring buffer
440            // (Decision E). Browsers preflight any non-CORS-safelisted
441            // request header; without this entry the preflight fails
442            // before the actual reconnect lands.
443            axum::http::HeaderName::from_static(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER),
444        ])
445}
446
447/// True if `origin` is `http(s)://localhost[:port]` or
448/// `http(s)://127.0.0.1[:port]` or `http(s)://[::1][:port]` (loopback IPv6).
449/// Anything else (incl. nip.io tricks like `127.0.0.1.nip.io`) is rejected.
450fn is_localhost_origin(origin: &str) -> bool {
451    let rest = origin
452        .strip_prefix("http://")
453        .or_else(|| origin.strip_prefix("https://"));
454    let host = match rest {
455        Some(r) => r,
456        None => return false,
457    };
458    // Strip path (shouldn't appear on Origin headers but defend anyway).
459    let host = host.split('/').next().unwrap_or(host);
460    // Strip port.
461    let host = if let Some(idx) = host.rfind(':') {
462        // For [::1]:port, keep the brackets in the host part.
463        if host.starts_with('[') {
464            // Find matching ']'; everything up to and including it is the host.
465            host.find(']').map(|i| &host[..=i]).unwrap_or(host)
466        } else {
467            &host[..idx]
468        }
469    } else {
470        host
471    };
472    matches!(host, "localhost" | "127.0.0.1" | "[::1]")
473}
474
475/// Bind + serve (v0.7.x legacy shape). `shutdown` is awaited inside
476/// axum's `with_graceful_shutdown`; resolving it triggers a clean drain.
477/// `bearer_token = None` runs unauthenticated (loopback default);
478/// `Some(t)` requires `Authorization: Bearer t` on every request
479/// except `GET /health` + `GET /openapi.json`.
480pub async fn serve_http(
481    addr: SocketAddr,
482    state: SoloHttpState,
483    bearer_token: Option<String>,
484    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
485) -> std::io::Result<()> {
486    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
487    serve_http_with_auth_config(addr, state, auth, shutdown).await
488}
489
490/// Bind + serve with a config-driven auth block (v0.8.0 P3+).
491/// `auth = None` runs unauthenticated. See [`router_with_auth_config`]
492/// for the auth-mode semantics.
493pub async fn serve_http_with_auth_config(
494    addr: SocketAddr,
495    state: SoloHttpState,
496    auth: Option<AuthConfig>,
497    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
498) -> std::io::Result<()> {
499    let auth_kind = match &auth {
500        Some(AuthConfig::Bearer { .. }) => "bearer",
501        Some(AuthConfig::Oidc { .. }) => "oidc",
502        None => "none",
503    };
504    let app = router_with_auth_config(state, auth);
505    let listener = tokio::net::TcpListener::bind(addr).await?;
506    tracing::info!(%addr, auth = auth_kind, "solo http: listening");
507    axum::serve(listener, app)
508        .with_graceful_shutdown(shutdown)
509        .await
510}
511
512// ---------------------------------------------------------------------------
513// OpenAPI 3.1 spec
514// ---------------------------------------------------------------------------
515
516/// Serve the hand-crafted OpenAPI 3.1 spec at `GET /openapi.json`.
517///
518/// We keep the spec hand-written (rather than deriving via `utoipa`)
519/// for v0.1: 4 simple endpoints, types live across crate boundaries
520/// (`solo_query::RecallResult`, `solo_query::EpisodeRecord`), and a
521/// `utoipa` retrofit would touch every crate. Hand-crafted is one
522/// JSON literal in this file; a smoke test in `handler_tests` parses
523/// the response and asserts the expected paths + components are
524/// present, so drift between spec and code is caught at PR time.
525async fn openapi_handler() -> Json<serde_json::Value> {
526    Json(openapi_spec())
527}
528
529/// Build the OpenAPI 3.1 spec describing Solo's HTTP transport.
530/// Public so the smoke test + future client-codegen tooling can
531/// produce the same document without spinning up the server.
532pub fn openapi_spec() -> serde_json::Value {
533    serde_json::json!({
534        "openapi": "3.1.0",
535        "info": {
536            "title": "Solo HTTP API",
537            "description":
538                "Local-first personal memory daemon. The HTTP transport \
539                 mirrors the MCP memory tools. Default deployment is loopback-only \
540                 (127.0.0.1); LAN-bound deployments require a bearer \
541                 token via `solo http-serve --bind <ip> --bearer-token-file <path>`.",
542            "version": env!("CARGO_PKG_VERSION"),
543            "license": { "name": "Apache-2.0" }
544        },
545        "servers": [
546            { "url": "http://127.0.0.1:7437", "description": "Default loopback (replace port with your --http-port)" }
547        ],
548        "components": {
549            "securitySchemes": {
550                "bearerAuth": {
551                    "type": "http",
552                    "scheme": "bearer",
553                    "description":
554                        "Bearer-token auth. Required only on LAN-bound deployments \
555                         (`solo http-serve --bind <non-loopback> --bearer-token-file <path>`); \
556                         the default `127.0.0.1` deployment is unauthenticated. \
557                         `GET /health` and `GET /openapi.json` are exempt from auth even \
558                         on bearer-protected instances."
559                }
560            },
561            "schemas": {
562                "RememberRequest": {
563                    "type": "object",
564                    "required": ["content"],
565                    "properties": {
566                        "content": { "type": "string", "minLength": 1, "description": "Episode content to embed + store." },
567                        "source_type": { "type": "string", "description": "Free-form source tag (e.g. `user_message`, `tool_output`). Defaults to `user_message`." },
568                        "source_id": { "type": "string", "description": "Optional upstream ID for traceability." }
569                    },
570                    "additionalProperties": false
571                },
572                "RememberResponse": {
573                    "type": "object",
574                    "required": ["memory_id"],
575                    "properties": {
576                        "memory_id": { "type": "string", "format": "uuid", "description": "UUID v7 assigned to the new episode." }
577                    }
578                },
579                "RecallRequest": {
580                    "type": "object",
581                    "required": ["query"],
582                    "properties": {
583                        "query": { "type": "string", "minLength": 1, "description": "Natural-language query; embedded by the same model as stored episodes." },
584                        "limit": { "type": "integer", "minimum": 1, "maximum": 50, "default": 5, "description": "Max number of hits to return." }
585                    },
586                    "additionalProperties": false
587                },
588                "RecallResult": {
589                    "type": "object",
590                    "description":
591                        "Recall response. Fields are stable across v0.1 but not exhaustively documented here — \
592                         see `solo_query::RecallResult` in the source for the canonical shape. \
593                         Treat as a forward-compatible JSON object.",
594                    "additionalProperties": true
595                },
596                "MemoryContextRequest": {
597                    "type": "object",
598                    "required": ["query"],
599                    "properties": {
600                        "query": { "type": "string", "minLength": 1, "description": "Natural-language query for episodic recall." },
601                        "subject": { "type": "string", "description": "Optional subject for structured facts; when present, facts also match object-position references." },
602                        "window_days": { "type": "integer", "minimum": 1, "description": "Optional recency window for themes." },
603                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5, "description": "Per-section result limit." }
604                    },
605                    "additionalProperties": false
606                },
607                "MemoryContextResult": {
608                    "type": "object",
609                    "description": "Agent-oriented memory context bundle: recall, themes, facts, and contradictions.",
610                    "additionalProperties": true
611                },
612                "MemoryUpdateRequest": {
613                    "type": "object",
614                    "required": ["content"],
615                    "properties": {
616                        "content": { "type": "string", "minLength": 1, "description": "Replacement content for the active memory." }
617                    },
618                    "additionalProperties": false
619                },
620                "MemoryUpdateResult": {
621                    "type": "object",
622                    "description": "Result of PATCH /memory/{id}. See `solo_query::MemoryUpdateResult`.",
623                    "additionalProperties": true
624                },
625                "ConsolidationScope": {
626                    "type": "object",
627                    "description": "Filter + flags for consolidation. All fields optional; empty body = unbounded defaults.",
628                    "properties": {
629                        "window_days": { "type": "integer", "nullable": true, "description": "Restrict to memories with ts_ms >= now - window_days * 86400000. Null/omitted = unbounded." },
630                        "force_merge": { "type": "boolean", "default": false, "description": "Run the existing-vs-existing merge + abstraction-regen passes even with zero unclustered candidates. Drift catch-up on quiet corpora. Added in 0.3.1." }
631                    },
632                    "additionalProperties": false
633                },
634                "ConsolidationReport": {
635                    "type": "object",
636                    "required": [
637                        "episodes_seen", "clusters_built", "clusters_merged",
638                        "clusters_absorbed", "existing_clusters_merged",
639                        "episodes_clustered", "abstractions_built",
640                        "abstractions_regenerated", "triples_built",
641                        "contradictions_found"
642                    ],
643                    "properties": {
644                        "episodes_seen":             { "type": "integer", "minimum": 0 },
645                        "clusters_built":            { "type": "integer", "minimum": 0, "description": "Brand-new clusters that survived to be persisted (post in-run-merge, post cross-run-absorb)." },
646                        "clusters_merged":           { "type": "integer", "minimum": 0, "description": "In-run merge: clusters absorbed into a sibling within this consolidate run (cross-UTC-bucket case). Counts losers." },
647                        "clusters_absorbed":         { "type": "integer", "minimum": 0, "description": "Cross-run absorb: freshly-built clusters folded into a pre-existing DB cluster with a similar centroid. Counts new-side clusters." },
648                        "existing_clusters_merged":  { "type": "integer", "minimum": 0, "description": "Existing-vs-existing merge: pre-existing DB clusters that drifted toward each other and now coalesce. Counts losers." },
649                        "episodes_clustered":        { "type": "integer", "minimum": 0 },
650                        "abstractions_built":        { "type": "integer", "minimum": 0, "description": "Fresh abstractions persisted for newly-built clusters. 0 when no LlmClient is wired." },
651                        "abstractions_regenerated":  { "type": "integer", "minimum": 0, "description": "Existing clusters whose stale abstractions were dropped and rebuilt because absorb or existing-merge changed their episode set. 0 without an LlmClient." },
652                        "triples_built":             { "type": "integer", "minimum": 0 },
653                        "contradictions_found":      { "type": "integer", "minimum": 0 }
654                    }
655                },
656                "EpisodeRecord": {
657                    "type": "object",
658                    "description":
659                        "Inspect response: full episode record. Fields are stable across v0.1 but not \
660                         exhaustively documented here — see `solo_query::EpisodeRecord` in the source. \
661                         Treat as a forward-compatible JSON object.",
662                    "additionalProperties": true
663                },
664                "ThemeHit": {
665                    "type": "object",
666                    "description":
667                        "One cluster + its (optional) abstraction. Returned by GET /memory/themes. \
668                         See `solo_query::ThemeHit` for the canonical shape: cluster_id, \
669                         abstraction_id?, abstraction_text?, episode_count, coherence, created_at_ms.",
670                    "additionalProperties": true
671                },
672                "FactHit": {
673                    "type": "object",
674                    "description":
675                        "One Steward-extracted SPO triple. Returned by GET /memory/facts_about. \
676                         See `solo_query::FactHit` for fields: triple_id, subject_id, predicate, \
677                         object_id, object_kind, valid_from_ms, valid_to_ms?, confidence, cluster_id?.",
678                    "additionalProperties": true
679                },
680                "EntityHit": {
681                    "type": "object",
682                    "description":
683                        "One discovered entity-like id from the structured-fact graph. Returned by \
684                         GET /memory/entities. See `solo_query::EntityHit`.",
685                    "additionalProperties": true
686                },
687                "ContradictionHit": {
688                    "type": "object",
689                    "description":
690                        "One Steward-flagged contradiction with each side's triple LEFT JOIN'd in. \
691                         Returned by GET /memory/contradictions. See `solo_query::ContradictionHit`: \
692                         a_id, b_id, kind, explanation, detected_at_ms, status, resolved_at_ms?, \
693                         resolution_note?, winning_triple_id?, a_triple?, b_triple?.",
694                    "additionalProperties": true
695                },
696                "ContradictionResolveRequest": {
697                    "type": "object",
698                    "required": ["a_id", "b_id", "kind"],
699                    "properties": {
700                        "a_id": { "type": "string", "minLength": 1 },
701                        "b_id": { "type": "string", "minLength": 1 },
702                        "kind": { "type": "string", "minLength": 1 },
703                        "status": {
704                            "type": "string",
705                            "enum": ["unresolved", "resolved", "reopened"],
706                            "default": "resolved"
707                        },
708                        "resolution_note": { "type": "string" },
709                        "winning_triple_id": { "type": "string" }
710                    },
711                    "additionalProperties": false
712                },
713                "ContradictionResolution": {
714                    "type": "object",
715                    "description": "Lifecycle update result for POST /memory/contradictions/resolve.",
716                    "additionalProperties": true
717                },
718                "ClusterRecord": {
719                    "type": "object",
720                    "description":
721                        "Snapshot of one cluster — its row, optional abstraction, and source episodes \
722                         (content truncated to 200 chars unless ?full_content=true). Returned by \
723                         GET /memory/clusters/{cluster_id}. See `solo_query::ClusterRecord`.",
724                    "additionalProperties": true
725                },
726                "IngestDocumentRequest": {
727                    "type": "object",
728                    "required": ["path"],
729                    "properties": {
730                        "path": {
731                            "type": "string",
732                            "minLength": 1,
733                            "description":
734                                "Server-side absolute path to the file to ingest. The file must be \
735                                 readable by the Solo process. Supported formats: plaintext / \
736                                 markdown / code, HTML, PDF."
737                        }
738                    },
739                    "additionalProperties": false
740                },
741                "IngestReport": {
742                    "type": "object",
743                    "description":
744                        "Returned by POST /memory/documents. Reports the document id assigned, \
745                         the number of chunks persisted + embedded, the total byte size, and a \
746                         `deduped` flag (true when the same content_hash was already present and \
747                         the existing doc_id was returned unchanged). See `solo_storage::IngestReport`.",
748                    "required": ["doc_id", "chunks_persisted", "bytes_ingested", "deduped"],
749                    "properties": {
750                        "doc_id":            { "type": "string", "format": "uuid" },
751                        "chunks_persisted":  { "type": "integer", "minimum": 0 },
752                        "bytes_ingested":    { "type": "integer", "minimum": 0, "format": "int64" },
753                        "deduped":           { "type": "boolean" }
754                    },
755                    "additionalProperties": false
756                },
757                "ForgetDocumentReport": {
758                    "type": "object",
759                    "description":
760                        "Returned by DELETE /memory/documents/{id}. Reports the doc_id soft-deleted \
761                         and how many chunk rowids were tombstoned in the HNSW index. The chunk rows \
762                         themselves survive in SQL for forensic value. See `solo_storage::ForgetDocumentReport`.",
763                    "required": ["doc_id", "chunks_tombstoned"],
764                    "properties": {
765                        "doc_id":             { "type": "string", "format": "uuid" },
766                        "chunks_tombstoned":  { "type": "integer", "minimum": 0 }
767                    },
768                    "additionalProperties": false
769                },
770                "SearchDocsRequest": {
771                    "type": "object",
772                    "required": ["query"],
773                    "properties": {
774                        "query": { "type": "string", "minLength": 1 },
775                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 }
776                    },
777                    "additionalProperties": false
778                },
779                "DocSearchHit": {
780                    "type": "object",
781                    "description":
782                        "One chunk hit + parent-doc context. Fields per `solo_query::DocSearchHit`: \
783                         chunk_id, doc_id, doc_title?, doc_source?, doc_mime_type?, chunk_index, \
784                         content, cos_distance, start_offset, end_offset.",
785                    "additionalProperties": true
786                },
787                "DocumentInspectResult": {
788                    "type": "object",
789                    "description":
790                        "Returned by GET /memory/documents/{id}. A `document` record (full metadata) \
791                         plus an ordered list of chunk summaries (each preview truncated to 200 \
792                         chars). See `solo_query::DocumentInspectResult`.",
793                    "additionalProperties": true
794                },
795                "DocumentSummary": {
796                    "type": "object",
797                    "description":
798                        "One row from GET /memory/documents. Fields per `solo_query::DocumentSummary`: \
799                         doc_id, title?, source?, mime_type?, ingested_at_ms, chunk_count, status.",
800                    "additionalProperties": true
801                },
802                "GraphNode": {
803                    "type": "object",
804                    "required": ["id", "kind", "label", "tenant_id"],
805                    "properties": {
806                        "id": { "type": "string", "description": "Prefixed graph node id, e.g. ep:<uuid>, doc:<uuid>, chunk:<uuid>, cl:<id>, ent:<value>." },
807                        "kind": { "type": "string", "enum": ["episode", "document", "chunk", "cluster", "entity"] },
808                        "label": { "type": "string" },
809                        "tenant_id": { "type": "string" },
810                        "preview": { "type": ["string", "null"] },
811                        "score": { "type": ["number", "null"] },
812                        "meta": { "type": ["object", "null"], "additionalProperties": true }
813                    },
814                    "additionalProperties": true
815                },
816                "GraphEdge": {
817                    "type": "object",
818                    "required": ["id", "source", "target", "kind"],
819                    "properties": {
820                        "id": { "type": "string" },
821                        "source": { "type": "string" },
822                        "target": { "type": "string" },
823                        "kind": { "type": "string" },
824                        "label": { "type": ["string", "null"] },
825                        "weight": { "type": ["number", "null"] },
826                        "meta": { "type": ["object", "null"], "additionalProperties": true }
827                    },
828                    "additionalProperties": true
829                },
830                "GraphResponse": {
831                    "type": "object",
832                    "required": ["nodes", "edges"],
833                    "properties": {
834                        "nodes": { "type": "array", "items": { "$ref": "#/components/schemas/GraphNode" } },
835                        "edges": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } }
836                    }
837                },
838                "GraphNodesResponse": {
839                    "type": "object",
840                    "required": ["nodes"],
841                    "properties": {
842                        "nodes": { "type": "array", "items": { "$ref": "#/components/schemas/GraphNode" } },
843                        "next_cursor": { "type": ["string", "null"] }
844                    }
845                },
846                "GraphEdgesResponse": {
847                    "type": "object",
848                    "required": ["edges"],
849                    "properties": {
850                        "edges": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } },
851                        "next_cursor": { "type": ["string", "null"] }
852                    }
853                },
854                "GraphInspectResponse": {
855                    "type": "object",
856                    "required": ["node"],
857                    "properties": {
858                        "node": { "$ref": "#/components/schemas/GraphNode" },
859                        "record": { "type": ["object", "null"], "additionalProperties": true },
860                        "triples_in": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } },
861                        "triples_out": { "type": "array", "items": { "$ref": "#/components/schemas/GraphEdge" } }
862                    },
863                    "additionalProperties": true
864                },
865                "TenantListItem": {
866                    "type": "object",
867                    "required": ["id", "display_name", "created_at_ms", "last_accessed_ms", "status", "quota_bytes", "episode_count", "size_bytes", "pct_used"],
868                    "properties": {
869                        "id": { "type": "string" },
870                        "display_name": { "type": ["string", "null"] },
871                        "created_at_ms": { "type": "integer", "format": "int64" },
872                        "last_accessed_ms": { "type": ["integer", "null"], "format": "int64" },
873                        "status": { "type": "string", "enum": ["active"] },
874                        "quota_bytes": { "type": ["integer", "null"], "minimum": 0 },
875                        "episode_count": { "type": ["integer", "null"], "minimum": 0 },
876                        "size_bytes": { "type": ["integer", "null"], "minimum": 0 },
877                        "pct_used": { "type": ["number", "null"], "minimum": 0, "maximum": 100 }
878                    }
879                },
880                "TenantsListResponse": {
881                    "type": "object",
882                    "required": ["tenants"],
883                    "properties": {
884                        "tenants": { "type": "array", "items": { "$ref": "#/components/schemas/TenantListItem" } }
885                    }
886                },
887                "StatusResponse": {
888                    "type": "object",
889                    "required": ["ok", "version", "tenant", "embedder", "active_tenants", "mcp"],
890                    "properties": {
891                        "ok": { "type": "boolean" },
892                        "version": { "type": "string" },
893                        "tenant": {
894                            "type": "object",
895                            "required": ["id", "registered", "status", "quota_bytes", "last_accessed_ms"],
896                            "properties": {
897                                "id": { "type": "string" },
898                                "registered": { "type": "boolean" },
899                                "status": { "type": ["string", "null"], "enum": ["active", null] },
900                                "quota_bytes": { "type": ["integer", "null"], "minimum": 0 },
901                                "last_accessed_ms": { "type": ["integer", "null"], "format": "int64" }
902                            }
903                        },
904                        "embedder": {
905                            "type": "object",
906                            "required": ["name", "version", "dim", "dtype"],
907                            "properties": {
908                                "name": { "type": "string" },
909                                "version": { "type": "string" },
910                                "dim": { "type": "integer", "minimum": 1 },
911                                "dtype": { "type": "string" }
912                            }
913                        },
914                        "active_tenants": { "type": "integer", "minimum": 0 },
915                        "mcp": {
916                            "type": "object",
917                            "required": ["sessions"],
918                            "properties": {
919                                "sessions": { "type": "integer", "minimum": 0 }
920                            }
921                        }
922                    }
923                },
924                "JsonRpcRequest": {
925                    "type": "object",
926                    "required": ["jsonrpc", "method"],
927                    "properties": {
928                        "jsonrpc": { "type": "string", "enum": ["2.0"] },
929                        "id": { "description": "String or number request id. Omit for notifications." },
930                        "method": { "type": "string" },
931                        "params": { "type": ["object", "array", "null"], "additionalProperties": true }
932                    },
933                    "additionalProperties": true
934                },
935                "JsonRpcResponse": {
936                    "type": "object",
937                    "required": ["jsonrpc", "id"],
938                    "properties": {
939                        "jsonrpc": { "type": "string", "enum": ["2.0"] },
940                        "id": {},
941                        "result": {},
942                        "error": {
943                            "type": "object",
944                            "required": ["code", "message"],
945                            "properties": {
946                                "code": { "type": "integer" },
947                                "message": { "type": "string" },
948                                "data": {}
949                            }
950                        }
951                    },
952                    "additionalProperties": true
953                },
954                "ApiError": {
955                    "type": "object",
956                    "required": ["error", "status"],
957                    "properties": {
958                        "error": { "type": "string" },
959                        "status": { "type": "integer", "minimum": 400, "maximum": 599 }
960                    }
961                }
962            }
963        },
964        "paths": {
965            "/health": {
966                "get": {
967                    "summary": "Liveness probe",
968                    "description": "Returns plain text `ok`. Always unauthenticated.",
969                    "responses": {
970                        "200": {
971                            "description": "Server is up.",
972                            "content": { "text/plain": { "schema": { "type": "string", "example": "ok" } } }
973                        }
974                    }
975                }
976            },
977            "/openapi.json": {
978                "get": {
979                    "summary": "Self-describing OpenAPI 3.1 spec",
980                    "description": "Returns this document. Always unauthenticated.",
981                    "responses": {
982                        "200": {
983                            "description": "OpenAPI 3.1 document.",
984                            "content": { "application/json": { "schema": { "type": "object" } } }
985                        }
986                    }
987                }
988            },
989            "/memory": {
990                "post": {
991                    "summary": "Remember (store an episode)",
992                    "description": "Equivalent to MCP tool `memory_remember`.",
993                    "security": [{ "bearerAuth": [] }, {}],
994                    "requestBody": {
995                        "required": true,
996                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberRequest" } } }
997                    },
998                    "responses": {
999                        "200": {
1000                            "description": "Memory stored; returns the new MemoryId.",
1001                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberResponse" } } }
1002                        },
1003                        "400": { "description": "Bad request (e.g. empty content).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1004                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1005                    }
1006                }
1007            },
1008            "/memory/search": {
1009                "post": {
1010                    "summary": "Recall (vector search)",
1011                    "description": "Equivalent to MCP tool `memory_recall`. Embeds the query, runs HNSW search, returns the top-K hits in cosine-distance order.",
1012                    "security": [{ "bearerAuth": [] }, {}],
1013                    "requestBody": {
1014                        "required": true,
1015                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallRequest" } } }
1016                    },
1017                    "responses": {
1018                        "200": {
1019                            "description": "Search results.",
1020                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallResult" } } }
1021                        },
1022                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1023                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1024                    }
1025                }
1026            },
1027            "/memory/context": {
1028                "post": {
1029                    "summary": "Build agent memory context",
1030                    "description": "Equivalent to MCP tool `memory_context`. Returns one bounded bundle containing episodic recall, recent themes, optional facts about a subject, and contradictions.",
1031                    "security": [{ "bearerAuth": [] }, {}],
1032                    "requestBody": {
1033                        "required": true,
1034                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryContextRequest" } } }
1035                    },
1036                    "responses": {
1037                        "200": {
1038                            "description": "Combined memory context.",
1039                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryContextResult" } } }
1040                        },
1041                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1042                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1043                    }
1044                }
1045            },
1046            "/memory/consolidate": {
1047                "post": {
1048                    "summary": "Run a consolidation pass (clustering + abstraction)",
1049                    "description":
1050                        "Idempotent. Triggers the SWS-equivalent clustering pass; if a `Steward` LLM is wired \
1051                         on the server, also runs the REM-equivalent abstraction pass that populates \
1052                         `semantic_abstractions` and `triples`. Empty request body = default scope (unbounded \
1053                         window). Equivalent to the `solo consolidate` CLI.",
1054                    "security": [{ "bearerAuth": [] }, {}],
1055                    "requestBody": {
1056                        "required": false,
1057                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationScope" } } }
1058                    },
1059                    "responses": {
1060                        "200": {
1061                            "description": "Consolidation complete; report counts the work done.",
1062                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationReport" } } }
1063                        },
1064                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1065                    }
1066                }
1067            },
1068            "/backup": {
1069                "post": {
1070                    "summary": "Online encrypted backup",
1071                    "description":
1072                        "Run an online SQLCipher backup of the live data dir to a server-side path. \
1073                         The destination file is encrypted with the same Argon2id-derived raw key as \
1074                         the source, so it restores under the same passphrase + a copy of the source's \
1075                         `solo.config.toml`. Hot — the backup runs against the writer's existing \
1076                         connection without taking the lockfile, so the daemon keeps serving reads + \
1077                         writes during the operation. v0.3.2+.",
1078                    "security": [{ "bearerAuth": [] }, {}],
1079                    "requestBody": {
1080                        "required": true,
1081                        "content": { "application/json": { "schema": {
1082                            "type": "object",
1083                            "properties": {
1084                                "to": { "type": "string", "description": "Server-side absolute path for the backup file." },
1085                                "force": { "type": "boolean", "description": "Overwrite an existing destination file. Default false.", "default": false }
1086                            },
1087                            "required": ["to"]
1088                        } } }
1089                    },
1090                    "responses": {
1091                        "200": {
1092                            "description": "Backup complete; reports the destination path + elapsed milliseconds.",
1093                            "content": { "application/json": { "schema": {
1094                                "type": "object",
1095                                "properties": {
1096                                    "path": { "type": "string" },
1097                                    "elapsed_ms": { "type": "integer", "format": "int64" }
1098                                }
1099                            } } }
1100                        },
1101                        "400": { "description": "Destination invalid, exists without force, or its parent doesn't exist." },
1102                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1103                        "500": { "description": "Backup failed (disk full, permission denied, etc.)." }
1104                    }
1105                }
1106            },
1107            "/memory/{id}": {
1108                "get": {
1109                    "summary": "Inspect a memory by ID",
1110                    "description": "Equivalent to MCP tool `memory_inspect`.",
1111                    "security": [{ "bearerAuth": [] }, {}],
1112                    "parameters": [{
1113                        "name": "id",
1114                        "in": "path",
1115                        "required": true,
1116                        "schema": { "type": "string", "format": "uuid" },
1117                        "description": "MemoryId (UUID v7)."
1118                    }],
1119                    "responses": {
1120                        "200": {
1121                            "description": "Episode record.",
1122                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/EpisodeRecord" } } }
1123                        },
1124                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1125                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1126                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1127                    }
1128                },
1129                "patch": {
1130                    "summary": "Correct/update a single active memory",
1131                    "description":
1132                        "Equivalent to MCP tool `memory_update`. Rewrites the active episode content, \
1133                         refreshes its embedding, updates the pending index/HNSW entry, and records \
1134                         an audit event. Forgotten memories cannot be updated.",
1135                    "security": [{ "bearerAuth": [] }, {}],
1136                    "parameters": [
1137                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } }
1138                    ],
1139                    "requestBody": {
1140                        "required": true,
1141                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryUpdateRequest" } } }
1142                    },
1143                    "responses": {
1144                        "200": {
1145                            "description": "Updated memory metadata.",
1146                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryUpdateResult" } } }
1147                        },
1148                        "400": { "description": "Malformed ID or empty content.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1149                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1150                        "409": { "description": "Memory exists but is not active.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1151                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1152                    }
1153                },
1154                "delete": {
1155                    "summary": "Forget (soft-delete) a memory by ID",
1156                    "description":
1157                        "Equivalent to MCP tool `memory_forget`. Soft-delete: flips `episodes.status = 'forgotten'` \
1158                         and tombstones the HNSW vector. The row + embedding are preserved for forensics; \
1159                         re-running `solo reembed` after this does NOT restore visibility.",
1160                    "security": [{ "bearerAuth": [] }, {}],
1161                    "parameters": [
1162                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } },
1163                        { "name": "reason", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Free-form reason logged via tracing (not yet persisted to the DB)." }
1164                    ],
1165                    "responses": {
1166                        "204": { "description": "Forgotten (or already forgotten — idempotent)." },
1167                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1168                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1169                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1170                    }
1171                }
1172            },
1173            "/memory/themes": {
1174                "get": {
1175                    "summary": "List recent cluster themes",
1176                    "description":
1177                        "Equivalent to MCP tool `memory_themes`. List cluster abstractions ordered by \
1178                         most-recent first. Use to surface 'what has the user been thinking about lately' \
1179                         without paging through individual episodes. v0.4.0+.",
1180                    "security": [{ "bearerAuth": [] }, {}],
1181                    "parameters": [
1182                        { "name": "window_days", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1 }, "description": "Optional time window. Omit for unfiltered (all-time, most-recent first)." },
1183                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1184                    ],
1185                    "responses": {
1186                        "200": {
1187                            "description": "Array of ThemeHits (possibly empty).",
1188                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ThemeHit" } } } }
1189                        },
1190                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1191                    }
1192                }
1193            },
1194            "/memory/facts_about": {
1195                "get": {
1196                    "summary": "Query the SPO knowledge graph by subject",
1197                    "description":
1198                        "Equivalent to MCP tool `memory_facts_about`. Query Steward-extracted triples by \
1199                         subject + optional predicate + optional time window. Subject is required \
1200                         (predicate-only scans not supported). Pass `include_as_object=true` (v0.5.1+) \
1201                         to also surface rows where `subject` appears as the object. v0.4.0+.",
1202                    "security": [{ "bearerAuth": [] }, {}],
1203                    "parameters": [
1204                        { "name": "subject", "in": "query", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Subject id to query (e.g. `Sam`)." },
1205                        { "name": "predicate", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Optional predicate filter (e.g. `works_at`)." },
1206                        { "name": "since_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_from_ms lower bound (epoch ms)." },
1207                        { "name": "until_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through." },
1208                        { "name": "include_as_object", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, also match rows where `subject` appears as the object (e.g. surface 'Sam pushes back on PRs about Maya' under subject='Maya'). Default false. v0.5.1+." },
1209                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1210                    ],
1211                    "responses": {
1212                        "200": {
1213                            "description": "Array of FactHits (possibly empty).",
1214                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/FactHit" } } } }
1215                        },
1216                        "400": { "description": "Bad request (e.g. empty subject).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1217                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1218                    }
1219                }
1220            },
1221            "/memory/entities": {
1222                "get": {
1223                    "summary": "Discover structured-graph entities",
1224                    "description":
1225                        "Equivalent to MCP tool `memory_entities`. Searches entity-like ids found in \
1226                         active triples and returns counts plus common predicates. Use before \
1227                         `/memory/facts_about` when the exact subject id is uncertain.",
1228                    "security": [{ "bearerAuth": [] }, {}],
1229                    "parameters": [
1230                        { "name": "query", "in": "query", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Partial or exact entity id." },
1231                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1232                    ],
1233                    "responses": {
1234                        "200": {
1235                            "description": "Array of EntityHits (possibly empty).",
1236                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/EntityHit" } } } }
1237                        },
1238                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1239                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1240                    }
1241                }
1242            },
1243            "/memory/contradictions": {
1244                "get": {
1245                    "summary": "List Steward-flagged contradictions",
1246                    "description":
1247                        "Equivalent to MCP tool `memory_contradictions`. Each result includes both \
1248                         sides' triple SPO via LEFT JOIN for context. v0.4.0+.",
1249                    "security": [{ "bearerAuth": [] }, {}],
1250                    "parameters": [
1251                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
1252                    ],
1253                    "responses": {
1254                        "200": {
1255                            "description": "Array of ContradictionHits (possibly empty).",
1256                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ContradictionHit" } } } }
1257                        },
1258                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1259                    }
1260                }
1261            },
1262            "/memory/contradictions/resolve": {
1263                "post": {
1264                    "summary": "Resolve or reopen a contradiction",
1265                    "description":
1266                        "Equivalent to MCP tool `memory_contradiction_resolve`. Updates the lifecycle \
1267                         fields on one contradiction row after the user clarifies which memory is current.",
1268                    "security": [{ "bearerAuth": [] }, {}],
1269                    "requestBody": {
1270                        "required": true,
1271                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ContradictionResolveRequest" } } }
1272                    },
1273                    "responses": {
1274                        "200": {
1275                            "description": "Contradiction lifecycle update result.",
1276                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ContradictionResolution" } } }
1277                        },
1278                        "400": { "description": "Bad request (missing ids/kind or invalid status).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1279                        "404": { "description": "No matching contradiction.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1280                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1281                    }
1282                }
1283            },
1284            "/memory/clusters/{cluster_id}": {
1285                "get": {
1286                    "summary": "Inspect a single cluster",
1287                    "description":
1288                        "Equivalent to MCP tool `memory_inspect_cluster`. Returns the cluster row, \
1289                         its (optional) abstraction, and its source episodes. By default each \
1290                         episode's `content` is truncated to 200 chars with a trailing `…`. Pass \
1291                         `?full_content=true` to get verbatim episode content. v0.5.0+.",
1292                    "security": [{ "bearerAuth": [] }, {}],
1293                    "parameters": [
1294                        { "name": "cluster_id", "in": "path", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Cluster id (from a previous GET /memory/themes response)." },
1295                        { "name": "full_content", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, return episode content verbatim. Default false (truncate to 200 chars + ellipsis)." }
1296                    ],
1297                    "responses": {
1298                        "200": {
1299                            "description": "Cluster snapshot.",
1300                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ClusterRecord" } } }
1301                        },
1302                        "400": { "description": "Bad request (e.g. empty cluster_id).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1303                        "404": { "description": "No such cluster.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1304                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1305                    }
1306                }
1307            },
1308            "/memory/documents": {
1309                "post": {
1310                    "summary": "Ingest a document",
1311                    "description":
1312                        "Equivalent to MCP tool `memory_ingest_document`. Reads the file at the \
1313                         supplied server-side path, parses + chunks + embeds, and persists under \
1314                         `documents` + `document_chunks`. Returns the new doc_id, chunk count, and \
1315                         a `deduped` flag (true when an existing document with the same content_hash \
1316                         was returned without re-embedding). v0.7.0+.",
1317                    "security": [{ "bearerAuth": [] }, {}],
1318                    "requestBody": {
1319                        "required": true,
1320                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestDocumentRequest" } } }
1321                    },
1322                    "responses": {
1323                        "200": {
1324                            "description": "Document ingested (or deduplicated).",
1325                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestReport" } } }
1326                        },
1327                        "400": { "description": "Bad request (e.g. empty path, file unreadable, parse error).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1328                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1329                    }
1330                },
1331                "get": {
1332                    "summary": "List ingested documents (paginated)",
1333                    "description":
1334                        "Equivalent to MCP tool `memory_list_documents`. Returns a paginated index, \
1335                         newest first. Forgotten documents are hidden by default; pass \
1336                         `?include_forgotten=true` to see them too. v0.7.0+.",
1337                    "security": [{ "bearerAuth": [] }, {}],
1338                    "parameters": [
1339                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 20 } },
1340                        { "name": "offset", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 0, "default": 0 } },
1341                        { "name": "include_forgotten", "in": "query", "required": false, "schema": { "type": "boolean", "default": false } }
1342                    ],
1343                    "responses": {
1344                        "200": {
1345                            "description": "Array of DocumentSummary (possibly empty).",
1346                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocumentSummary" } } } }
1347                        },
1348                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1349                    }
1350                }
1351            },
1352            "/memory/documents/search": {
1353                "post": {
1354                    "summary": "Vector search across document chunks",
1355                    "description":
1356                        "Equivalent to MCP tool `memory_search_docs`. Embeds the query and returns \
1357                         up to `limit` matching chunks, best match first, each annotated with the \
1358                         parent document's title + source path. Forgotten documents are excluded. \
1359                         v0.7.0+.",
1360                    "security": [{ "bearerAuth": [] }, {}],
1361                    "requestBody": {
1362                        "required": true,
1363                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SearchDocsRequest" } } }
1364                    },
1365                    "responses": {
1366                        "200": {
1367                            "description": "Array of DocSearchHits (possibly empty).",
1368                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocSearchHit" } } } }
1369                        },
1370                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1371                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1372                    }
1373                }
1374            },
1375            "/memory/documents/{id}": {
1376                "get": {
1377                    "summary": "Inspect one document",
1378                    "description":
1379                        "Equivalent to MCP tool `memory_inspect_document`. Returns the document's \
1380                         metadata plus a preview of every chunk (truncated to 200 chars). v0.7.0+.",
1381                    "security": [{ "bearerAuth": [] }, {}],
1382                    "parameters": [
1383                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "DocumentId (UUID v7)." }
1384                    ],
1385                    "responses": {
1386                        "200": {
1387                            "description": "Document inspection result.",
1388                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/DocumentInspectResult" } } }
1389                        },
1390                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1391                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1392                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1393                    }
1394                },
1395                "delete": {
1396                    "summary": "Forget (soft-delete) one document",
1397                    "description":
1398                        "Equivalent to MCP tool `memory_forget_document`. Flips `documents.status` \
1399                         to `forgotten` and tombstones every chunk's HNSW rowid. The chunk rows \
1400                         survive in SQL for forensic value. v0.7.0+.",
1401                    "security": [{ "bearerAuth": [] }, {}],
1402                    "parameters": [
1403                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } }
1404                    ],
1405                    "responses": {
1406                        "200": {
1407                            "description": "Document soft-deleted; report counts chunks tombstoned.",
1408                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ForgetDocumentReport" } } }
1409                        },
1410                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1411                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1412                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1413                    }
1414                }
1415            },
1416            "/v1/graph/expand": {
1417                "get": {
1418                    "summary": "Expand one graph node",
1419                    "description": "Return neighboring nodes and edges for one graph node id. Powers solo-web graph expansion.",
1420                    "security": [{ "bearerAuth": [] }, {}],
1421                    "parameters": [
1422                        { "name": "node_id", "in": "query", "required": true, "schema": { "type": "string" } },
1423                        { "name": "kind", "in": "query", "required": true, "schema": { "type": "string", "enum": ["cluster_member", "document_chunk", "triple", "semantic"] } },
1424                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 25 } }
1425                    ],
1426                    "responses": {
1427                        "200": { "description": "Expanded graph neighborhood.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphResponse" } } } },
1428                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1429                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1430                        "404": { "description": "Tenant or node not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1431                    }
1432                }
1433            },
1434            "/v1/graph/nodes": {
1435                "get": {
1436                    "summary": "List graph nodes",
1437                    "description": "Paginated graph-node catalog used by solo-web's initial render.",
1438                    "security": [{ "bearerAuth": [] }, {}],
1439                    "parameters": [
1440                        { "name": "kind", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Comma-separated node kinds, e.g. episode,document,entity." },
1441                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 50 } },
1442                        { "name": "cursor", "in": "query", "required": false, "schema": { "type": "string" } },
1443                        { "name": "since_ms", "in": "query", "required": false, "schema": { "type": "integer", "format": "int64" } },
1444                        { "name": "until_ms", "in": "query", "required": false, "schema": { "type": "integer", "format": "int64" } }
1445                    ],
1446                    "responses": {
1447                        "200": { "description": "Page of graph nodes.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphNodesResponse" } } } },
1448                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1449                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1450                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1451                    }
1452                }
1453            },
1454            "/v1/graph/edges": {
1455                "get": {
1456                    "summary": "List graph edges",
1457                    "description": "Paginated graph-edge catalog for explicit graph relations. Semantic HNSW edges are exposed through /v1/graph/neighbors/{id}.",
1458                    "security": [{ "bearerAuth": [] }, {}],
1459                    "parameters": [
1460                        { "name": "type", "in": "query", "required": false, "schema": { "type": "string" } },
1461                        { "name": "node_id", "in": "query", "required": false, "schema": { "type": "string" } },
1462                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 50 } },
1463                        { "name": "cursor", "in": "query", "required": false, "schema": { "type": "string" } }
1464                    ],
1465                    "responses": {
1466                        "200": { "description": "Page of graph edges.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphEdgesResponse" } } } },
1467                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1468                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1469                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1470                    }
1471                }
1472            },
1473            "/v1/graph/inspect/{id}": {
1474                "get": {
1475                    "summary": "Inspect one graph node",
1476                    "description": "Kind-discriminated full-record drill for solo-web's inspector panel.",
1477                    "security": [{ "bearerAuth": [] }, {}],
1478                    "parameters": [
1479                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string" } }
1480                    ],
1481                    "responses": {
1482                        "200": { "description": "Graph node inspection payload.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphInspectResponse" } } } },
1483                        "400": { "description": "Bad graph node id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1484                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1485                        "404": { "description": "Tenant or graph node not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1486                    }
1487                }
1488            },
1489            "/v1/graph/neighbors/{id}": {
1490                "get": {
1491                    "summary": "List graph neighbors",
1492                    "description": "Unified explicit and semantic neighbor lookup for solo-web's show-similar overlay.",
1493                    "security": [{ "bearerAuth": [] }, {}],
1494                    "parameters": [
1495                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string" } },
1496                        { "name": "kind", "in": "query", "required": false, "schema": { "type": "string", "enum": ["explicit", "semantic", "both"] } },
1497                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 25 } }
1498                    ],
1499                    "responses": {
1500                        "200": { "description": "Neighbor graph.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GraphResponse" } } } },
1501                        "400": { "description": "Bad graph query.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1502                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1503                        "404": { "description": "Tenant or graph node not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1504                    }
1505                }
1506            },
1507            "/v1/graph/stream": {
1508                "get": {
1509                    "summary": "Stream graph invalidations",
1510                    "description": "Server-Sent Events stream of graph-data invalidation notifications. Clients refetch affected pages on each event.",
1511                    "security": [{ "bearerAuth": [] }, {}],
1512                    "responses": {
1513                        "200": { "description": "SSE stream.", "content": { "text/event-stream": { "schema": { "type": "string" } } } },
1514                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1515                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1516                    }
1517                }
1518            },
1519            "/v1/status": {
1520                "get": {
1521                    "summary": "Authenticated Solo status",
1522                    "description": "Tenant-aware readiness payload for local UIs and agent bridges. Unlike public /health, this resolves auth and tenant routing.",
1523                    "security": [{ "bearerAuth": [] }, {}],
1524                    "parameters": [
1525                        { "name": "X-Solo-Tenant", "in": "header", "required": false, "schema": { "type": "string" } }
1526                    ],
1527                    "responses": {
1528                        "200": { "description": "Solo status payload.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/StatusResponse" } } } },
1529                        "400": { "description": "Invalid tenant header.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1530                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1531                        "404": { "description": "Tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1532                    }
1533                }
1534            },
1535            "/v1/tenants": {
1536                "get": {
1537                    "summary": "List visible tenants",
1538                    "description": "Principal-scoped active tenant list for solo-web's tenant picker and status UI.",
1539                    "security": [{ "bearerAuth": [] }, {}],
1540                    "responses": {
1541                        "200": {
1542                            "description": "Visible tenants.",
1543                            "headers": {
1544                                "X-Solo-Tenants-Count-Cap-Reached": {
1545                                    "schema": { "type": "string", "enum": ["true"] },
1546                                    "description": "Present when episode_count hydration was capped."
1547                                }
1548                            },
1549                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TenantsListResponse" } } }
1550                        },
1551                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1552                    }
1553                }
1554            },
1555            "/mcp": {
1556                "post": {
1557                    "summary": "MCP JSON-RPC request",
1558                    "description": "Streamable HTTP MCP request/response endpoint. A POST without Mcp-Session-Id creates a session and echoes it in the response header.",
1559                    "security": [{ "bearerAuth": [] }, {}],
1560                    "parameters": [
1561                        { "name": "Mcp-Session-Id", "in": "header", "required": false, "schema": { "type": "string" } },
1562                        { "name": "X-Solo-Tenant", "in": "header", "required": false, "schema": { "type": "string" } }
1563                    ],
1564                    "requestBody": {
1565                        "required": true,
1566                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/JsonRpcRequest" } } }
1567                    },
1568                    "responses": {
1569                        "200": {
1570                            "description": "JSON-RPC success or in-body error response.",
1571                            "headers": { "Mcp-Session-Id": { "schema": { "type": "string" } } },
1572                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/JsonRpcResponse" } } }
1573                        },
1574                        "202": { "description": "JSON-RPC notification accepted; no response body." },
1575                        "400": { "description": "Malformed JSON-RPC envelope or invalid tenant header.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1576                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1577                        "404": { "description": "Unknown tenant or unknown/expired MCP session.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1578                    }
1579                },
1580                "get": {
1581                    "summary": "MCP SSE stream",
1582                    "description": "Attach to an existing MCP session's resumable Server-Sent Events stream. Requires Mcp-Session-Id from a prior POST.",
1583                    "security": [{ "bearerAuth": [] }, {}],
1584                    "parameters": [
1585                        { "name": "Mcp-Session-Id", "in": "header", "required": true, "schema": { "type": "string" } },
1586                        { "name": "Last-Event-ID", "in": "header", "required": false, "schema": { "type": "string" } },
1587                        { "name": "X-Solo-Tenant", "in": "header", "required": false, "schema": { "type": "string" } }
1588                    ],
1589                    "responses": {
1590                        "200": { "description": "SSE stream.", "content": { "text/event-stream": { "schema": { "type": "string" } } } },
1591                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
1592                        "404": { "description": "Missing, unknown, or expired MCP session; or tenant not found.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } }
1593                    }
1594                }
1595            }
1596        }
1597    })
1598}
1599
1600// ---------------------------------------------------------------------------
1601// Handlers
1602// ---------------------------------------------------------------------------
1603
1604#[derive(Debug, Deserialize)]
1605struct RememberBody {
1606    content: String,
1607    #[serde(default)]
1608    source_type: Option<String>,
1609    #[serde(default)]
1610    source_id: Option<String>,
1611}
1612
1613#[derive(Debug, Serialize)]
1614struct RememberResponse {
1615    memory_id: String,
1616}
1617
1618async fn remember_handler(
1619    TenantExtractor(tenant): TenantExtractor,
1620    AuditPrincipal(principal): AuditPrincipal,
1621    Json(body): Json<RememberBody>,
1622) -> Result<Json<RememberResponse>, ApiError> {
1623    let content = body.content.trim_end().to_string();
1624    if content.is_empty() {
1625        return Err(ApiError::bad_request("content must not be empty"));
1626    }
1627    let embedding = tenant
1628        .embedder()
1629        .embed(&content)
1630        .await
1631        .map_err(ApiError::from)?;
1632    let episode = Episode {
1633        memory_id: MemoryId::new(),
1634        ts_ms: chrono::Utc::now().timestamp_millis(),
1635        source_type: body.source_type.unwrap_or_else(|| "user_message".into()),
1636        source_id: body.source_id,
1637        content,
1638        encoding_context: EncodingContext::default(),
1639        provenance: None,
1640        confidence: Confidence::new(0.9).unwrap(),
1641        strength: 0.5,
1642        salience: 0.5,
1643        tier: Tier::Hot,
1644    };
1645    let mid = tenant
1646        .write()
1647        .remember_as(principal, episode, embedding)
1648        .await
1649        .map_err(ApiError::from)?;
1650    Ok(Json(RememberResponse {
1651        memory_id: mid.to_string(),
1652    }))
1653}
1654
1655#[derive(Debug, Deserialize)]
1656struct RecallBody {
1657    query: String,
1658    #[serde(default = "default_limit")]
1659    limit: usize,
1660}
1661
1662#[derive(Debug, Deserialize)]
1663struct MemoryContextBody {
1664    query: String,
1665    #[serde(default)]
1666    subject: Option<String>,
1667    #[serde(default)]
1668    window_days: Option<i64>,
1669    #[serde(default = "default_limit")]
1670    limit: usize,
1671}
1672
1673fn default_limit() -> usize {
1674    5
1675}
1676
1677async fn recall_handler(
1678    TenantExtractor(tenant): TenantExtractor,
1679    AuditPrincipal(principal): AuditPrincipal,
1680    Json(body): Json<RecallBody>,
1681) -> Result<Json<solo_query::RecallResult>, ApiError> {
1682    // solo_query::run_recall handles empty-query rejection (returns
1683    // InvalidInput → ApiError::bad_request(400)) and clamps limit
1684    // upstream of the embedder call.
1685    let result = solo_query::run_recall(tenant.as_ref(), principal, &body.query, body.limit)
1686        .await
1687        .map_err(ApiError::from)?;
1688    Ok(Json(result))
1689}
1690
1691async fn memory_context_handler(
1692    State(s): State<SoloHttpState>,
1693    TenantExtractor(tenant): TenantExtractor,
1694    AuditPrincipal(principal): AuditPrincipal,
1695    Json(body): Json<MemoryContextBody>,
1696) -> Result<Json<solo_query::MemoryContextResult>, ApiError> {
1697    let result = solo_query::memory_context(
1698        tenant.as_ref(),
1699        principal,
1700        &body.query,
1701        body.subject.as_deref(),
1702        &s.user_aliases,
1703        body.window_days,
1704        body.limit,
1705    )
1706    .await
1707    .map_err(ApiError::from)?;
1708    Ok(Json(result))
1709}
1710
1711async fn inspect_handler(
1712    TenantExtractor(tenant): TenantExtractor,
1713    AuditPrincipal(principal): AuditPrincipal,
1714    Path(id): Path<String>,
1715) -> Result<Json<solo_query::EpisodeRecord>, ApiError> {
1716    let mid =
1717        MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1718    let row = solo_query::inspect_one(tenant.read(), tenant.audit(), principal, mid)
1719        .await
1720        .map_err(ApiError::from)?;
1721    Ok(Json(row))
1722}
1723
1724#[derive(Debug, Deserialize)]
1725struct MemoryUpdateBody {
1726    content: String,
1727}
1728
1729async fn update_handler(
1730    TenantExtractor(tenant): TenantExtractor,
1731    AuditPrincipal(principal): AuditPrincipal,
1732    Path(id): Path<String>,
1733    Json(body): Json<MemoryUpdateBody>,
1734) -> Result<Json<solo_query::MemoryUpdateResult>, ApiError> {
1735    let mid =
1736        MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1737    if body.content.trim().is_empty() {
1738        return Err(ApiError::bad_request("content must not be empty"));
1739    }
1740    let result = solo_query::memory_update(tenant.as_ref(), principal, mid, &body.content)
1741        .await
1742        .map_err(ApiError::from)?;
1743    Ok(Json(result))
1744}
1745
1746// Path 1 derived-layer handlers (v0.4.0+). Read handlers are GET-shaped:
1747// pure read-only queries against the Steward's outputs, query-string
1748// params for simple filters. Each handler delegates to a single
1749// solo_query::derived pipeline and returns the result Vec as JSON.
1750// Empty derived layer → 200 with `[]` body (parseable JSON array).
1751
1752#[derive(Debug, Deserialize)]
1753struct ThemesQuery {
1754    #[serde(default)]
1755    window_days: Option<i64>,
1756    #[serde(default = "default_limit")]
1757    limit: usize,
1758}
1759
1760async fn themes_handler(
1761    TenantExtractor(tenant): TenantExtractor,
1762    AuditPrincipal(principal): AuditPrincipal,
1763    Query(q): Query<ThemesQuery>,
1764) -> Result<Json<Vec<solo_query::ThemeHit>>, ApiError> {
1765    let hits = solo_query::themes(
1766        tenant.read(),
1767        tenant.audit(),
1768        principal,
1769        q.window_days,
1770        q.limit,
1771    )
1772    .await
1773    .map_err(ApiError::from)?;
1774    Ok(Json(hits))
1775}
1776
1777#[derive(Debug, Deserialize)]
1778struct FactsAboutQuery {
1779    subject: String,
1780    #[serde(default)]
1781    predicate: Option<String>,
1782    #[serde(default)]
1783    since_ms: Option<i64>,
1784    #[serde(default)]
1785    until_ms: Option<i64>,
1786    /// v0.5.1 Priority 8 — widen the query to also match rows where
1787    /// `subject` appears as the object. Default `false`.
1788    #[serde(default)]
1789    include_as_object: bool,
1790    #[serde(default = "default_limit")]
1791    limit: usize,
1792}
1793
1794async fn facts_about_handler(
1795    State(s): State<SoloHttpState>,
1796    TenantExtractor(tenant): TenantExtractor,
1797    AuditPrincipal(principal): AuditPrincipal,
1798    Query(q): Query<FactsAboutQuery>,
1799) -> Result<Json<Vec<solo_query::FactHit>>, ApiError> {
1800    if q.subject.trim().is_empty() {
1801        return Err(ApiError::bad_request("subject must not be empty"));
1802    }
1803    let hits = solo_query::facts_about(
1804        tenant.read(),
1805        tenant.audit(),
1806        principal,
1807        &q.subject,
1808        &s.user_aliases,
1809        q.include_as_object,
1810        q.predicate.as_deref(),
1811        q.since_ms,
1812        q.until_ms,
1813        q.limit,
1814    )
1815    .await
1816    .map_err(ApiError::from)?;
1817    Ok(Json(hits))
1818}
1819
1820#[derive(Debug, Deserialize)]
1821struct EntitiesQuery {
1822    query: String,
1823    #[serde(default = "default_limit")]
1824    limit: usize,
1825}
1826
1827async fn entities_handler(
1828    TenantExtractor(tenant): TenantExtractor,
1829    AuditPrincipal(principal): AuditPrincipal,
1830    Query(q): Query<EntitiesQuery>,
1831) -> Result<Json<Vec<solo_query::EntityHit>>, ApiError> {
1832    if q.query.trim().is_empty() {
1833        return Err(ApiError::bad_request("query must not be empty"));
1834    }
1835    let hits = solo_query::entities(tenant.read(), tenant.audit(), principal, &q.query, q.limit)
1836        .await
1837        .map_err(ApiError::from)?;
1838    Ok(Json(hits))
1839}
1840
1841#[derive(Debug, Deserialize)]
1842struct ContradictionsQuery {
1843    #[serde(default = "default_limit")]
1844    limit: usize,
1845}
1846
1847async fn contradictions_handler(
1848    TenantExtractor(tenant): TenantExtractor,
1849    AuditPrincipal(principal): AuditPrincipal,
1850    Query(q): Query<ContradictionsQuery>,
1851) -> Result<Json<Vec<solo_query::ContradictionHit>>, ApiError> {
1852    let hits = solo_query::contradictions(tenant.read(), tenant.audit(), principal, q.limit)
1853        .await
1854        .map_err(ApiError::from)?;
1855    Ok(Json(hits))
1856}
1857
1858fn default_contradiction_status() -> String {
1859    "resolved".to_string()
1860}
1861
1862#[derive(Debug, Deserialize)]
1863struct ContradictionResolveBody {
1864    a_id: String,
1865    b_id: String,
1866    kind: String,
1867    #[serde(default = "default_contradiction_status")]
1868    status: String,
1869    #[serde(default)]
1870    resolution_note: Option<String>,
1871    #[serde(default)]
1872    winning_triple_id: Option<String>,
1873}
1874
1875async fn contradiction_resolve_handler(
1876    TenantExtractor(tenant): TenantExtractor,
1877    AuditPrincipal(principal): AuditPrincipal,
1878    Json(body): Json<ContradictionResolveBody>,
1879) -> Result<Json<solo_query::ContradictionResolution>, ApiError> {
1880    if body.a_id.trim().is_empty() || body.b_id.trim().is_empty() || body.kind.trim().is_empty() {
1881        return Err(ApiError::bad_request(
1882            "a_id, b_id, and kind must not be empty",
1883        ));
1884    }
1885    let result = solo_query::resolve_contradiction(
1886        tenant.read(),
1887        tenant.audit(),
1888        principal,
1889        &body.a_id,
1890        &body.b_id,
1891        &body.kind,
1892        &body.status,
1893        body.resolution_note.as_deref(),
1894        body.winning_triple_id.as_deref(),
1895    )
1896    .await
1897    .map_err(ApiError::from)?;
1898    Ok(Json(result))
1899}
1900
1901#[derive(Debug, Deserialize, Default)]
1902struct InspectClusterQuery {
1903    /// Default `false` — episode `content` is truncated to
1904    /// `solo_query::EPISODE_TRUNCATE_CHARS` chars with a trailing `…`.
1905    /// `?full_content=true` returns each episode's content verbatim.
1906    #[serde(default)]
1907    full_content: bool,
1908}
1909
1910async fn inspect_cluster_handler(
1911    TenantExtractor(tenant): TenantExtractor,
1912    AuditPrincipal(principal): AuditPrincipal,
1913    Path(cluster_id): Path<String>,
1914    Query(q): Query<InspectClusterQuery>,
1915) -> Result<Json<solo_query::ClusterRecord>, ApiError> {
1916    if cluster_id.trim().is_empty() {
1917        return Err(ApiError::bad_request("cluster_id must not be empty"));
1918    }
1919    let record = solo_query::inspect_cluster(
1920        tenant.read(),
1921        tenant.audit(),
1922        principal,
1923        &cluster_id,
1924        q.full_content,
1925    )
1926    .await
1927    .map_err(ApiError::from)?;
1928    Ok(Json(record))
1929}
1930
1931// ---------------------------------------------------------------------------
1932// Document handlers (v0.7.0 P6)
1933// ---------------------------------------------------------------------------
1934
1935#[derive(Debug, Deserialize)]
1936struct IngestDocumentBody {
1937    /// Server-side absolute path to the file. Must be readable by the
1938    /// Solo process. The writer reads, parses, chunks, and embeds.
1939    path: String,
1940}
1941
1942async fn ingest_document_handler(
1943    TenantExtractor(tenant): TenantExtractor,
1944    AuditPrincipal(principal): AuditPrincipal,
1945    Json(body): Json<IngestDocumentBody>,
1946) -> Result<Json<solo_storage::IngestReport>, ApiError> {
1947    if body.path.trim().is_empty() {
1948        return Err(ApiError::bad_request("path must not be empty"));
1949    }
1950    let path = std::path::PathBuf::from(body.path);
1951    let chunk_config = solo_storage::document::ChunkConfig::default();
1952    let report = tenant
1953        .write()
1954        .ingest_document_as(principal, path, chunk_config)
1955        .await
1956        .map_err(ApiError::from)?;
1957    Ok(Json(report))
1958}
1959
1960#[derive(Debug, Deserialize)]
1961struct SearchDocsBody {
1962    query: String,
1963    #[serde(default = "default_limit")]
1964    limit: usize,
1965}
1966
1967async fn search_docs_handler(
1968    TenantExtractor(tenant): TenantExtractor,
1969    AuditPrincipal(principal): AuditPrincipal,
1970    Json(body): Json<SearchDocsBody>,
1971) -> Result<Json<Vec<solo_query::DocSearchHit>>, ApiError> {
1972    let hits = solo_query::run_doc_search(tenant.as_ref(), principal, &body.query, body.limit)
1973        .await
1974        .map_err(ApiError::from)?;
1975    Ok(Json(hits))
1976}
1977
1978async fn inspect_document_handler(
1979    TenantExtractor(tenant): TenantExtractor,
1980    AuditPrincipal(principal): AuditPrincipal,
1981    Path(id): Path<String>,
1982) -> Result<Json<solo_query::DocumentInspectResult>, ApiError> {
1983    let doc_id =
1984        DocumentId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1985    let result_opt =
1986        solo_query::inspect_document(tenant.read(), tenant.audit(), principal, &doc_id)
1987            .await
1988            .map_err(ApiError::from)?;
1989    match result_opt {
1990        Some(record) => Ok(Json(record)),
1991        None => Err(ApiError::not_found(format!("document {doc_id} not found"))),
1992    }
1993}
1994
1995#[derive(Debug, Deserialize)]
1996struct ListDocumentsQuery {
1997    #[serde(default = "default_list_documents_limit")]
1998    limit: usize,
1999    #[serde(default)]
2000    offset: usize,
2001    #[serde(default)]
2002    include_forgotten: bool,
2003}
2004
2005fn default_list_documents_limit() -> usize {
2006    20
2007}
2008
2009async fn list_documents_handler(
2010    TenantExtractor(tenant): TenantExtractor,
2011    AuditPrincipal(principal): AuditPrincipal,
2012    Query(q): Query<ListDocumentsQuery>,
2013) -> Result<Json<Vec<solo_query::DocumentSummary>>, ApiError> {
2014    let rows = solo_query::list_documents(
2015        tenant.read(),
2016        tenant.audit(),
2017        principal,
2018        q.limit,
2019        q.offset,
2020        q.include_forgotten,
2021    )
2022    .await
2023    .map_err(ApiError::from)?;
2024    Ok(Json(rows))
2025}
2026
2027async fn forget_document_handler(
2028    TenantExtractor(tenant): TenantExtractor,
2029    AuditPrincipal(principal): AuditPrincipal,
2030    Path(id): Path<String>,
2031) -> Result<Json<solo_storage::ForgetDocumentReport>, ApiError> {
2032    let doc_id =
2033        DocumentId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
2034    let report = tenant
2035        .write()
2036        .forget_document_as(principal, doc_id)
2037        .await
2038        .map_err(ApiError::from)?;
2039    Ok(Json(report))
2040}
2041
2042#[derive(Debug, Deserialize)]
2043struct ForgetQuery {
2044    #[serde(default)]
2045    reason: Option<String>,
2046}
2047
2048async fn forget_handler(
2049    TenantExtractor(tenant): TenantExtractor,
2050    AuditPrincipal(principal): AuditPrincipal,
2051    Path(id): Path<String>,
2052    Query(q): Query<ForgetQuery>,
2053) -> Result<StatusCode, ApiError> {
2054    let mid =
2055        MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
2056    let reason = q.reason.unwrap_or_else(|| "http".into());
2057    tenant
2058        .write()
2059        .forget_as(principal, mid, reason)
2060        .await
2061        .map_err(ApiError::from)?;
2062    Ok(StatusCode::NO_CONTENT)
2063}
2064
2065async fn consolidate_handler(
2066    TenantExtractor(tenant): TenantExtractor,
2067    AuditPrincipal(principal): AuditPrincipal,
2068    body: axum::body::Bytes,
2069) -> Result<Json<solo_storage::ConsolidationReport>, ApiError> {
2070    // Empty body = default scope (unbounded window). We parse via
2071    // `Bytes` rather than `Option<Json<T>>` because axum's `Json`
2072    // extractor 400s on an empty body when Content-Type is JSON
2073    // (it can't deserialize zero bytes as `T`), and the `Option`
2074    // wrapper doesn't reliably degrade that failure to `None`.
2075    let scope = if body.is_empty() {
2076        solo_storage::ConsolidationScope::default()
2077    } else {
2078        serde_json::from_slice(&body)
2079            .map_err(|e| ApiError::bad_request(format!("invalid JSON: {e}")))?
2080    };
2081    let report = tenant
2082        .write()
2083        .consolidate_as(principal, scope)
2084        .await
2085        .map_err(ApiError::from)?;
2086    Ok(Json(report))
2087}
2088
2089#[derive(Debug, Deserialize)]
2090struct BackupBody {
2091    /// Server-side absolute path where the backup file should be
2092    /// written. Must be writable by the Solo process. Refuses to
2093    /// overwrite an existing file unless `force = true`.
2094    to: String,
2095    #[serde(default)]
2096    force: bool,
2097}
2098
2099#[derive(Debug, Serialize)]
2100struct BackupResponse {
2101    path: String,
2102    elapsed_ms: u64,
2103}
2104
2105async fn backup_handler(
2106    TenantExtractor(tenant): TenantExtractor,
2107    Json(body): Json<BackupBody>,
2108) -> Result<Json<BackupResponse>, ApiError> {
2109    use std::path::PathBuf;
2110
2111    let dest = PathBuf::from(&body.to);
2112    if dest.as_os_str().is_empty() {
2113        return Err(ApiError::bad_request("`to` must not be empty"));
2114    }
2115    // CRITICAL ORDER: same-file refusal MUST come BEFORE `remove_file`.
2116    // The tenant's source DB path comes from the resolved TenantHandle.
2117    if solo_storage::paths_refer_to_same_file(tenant.db_path(), &dest) {
2118        return Err(ApiError::bad_request(format!(
2119            "destination {} is the same file as the source database; \
2120             refusing to run (would corrupt the live database)",
2121            dest.display()
2122        )));
2123    }
2124    if dest.exists() {
2125        if !body.force {
2126            return Err(ApiError::bad_request(format!(
2127                "destination {} exists; pass force=true to overwrite",
2128                dest.display()
2129            )));
2130        }
2131        std::fs::remove_file(&dest).map_err(|e| {
2132            ApiError::internal(format!(
2133                "remove existing destination {}: {e}",
2134                dest.display()
2135            ))
2136        })?;
2137    }
2138    if let Some(parent) = dest.parent() {
2139        if !parent.as_os_str().is_empty() && !parent.is_dir() {
2140            return Err(ApiError::bad_request(format!(
2141                "destination parent directory {} does not exist",
2142                parent.display()
2143            )));
2144        }
2145    }
2146
2147    let started = std::time::Instant::now();
2148    tenant
2149        .write()
2150        .backup(dest.clone())
2151        .await
2152        .map_err(ApiError::from)?;
2153    let elapsed_ms = started.elapsed().as_millis() as u64;
2154
2155    Ok(Json(BackupResponse {
2156        path: dest.display().to_string(),
2157        elapsed_ms,
2158    }))
2159}
2160
2161// ---------------------------------------------------------------------------
2162// Graph expand (v0.9.x — first /v1/graph/* endpoint for solo-web)
2163// ---------------------------------------------------------------------------
2164//
2165// `GET /v1/graph/expand?node_id=...&kind=...&limit=N` — read-only neighbor
2166// drill off any node. Supports four edge kinds:
2167//   * `cluster_member` — episodes ↔ clusters via `cluster_episodes`.
2168//   * `document_chunk` — documents ↔ chunks via `document_chunks.doc_id`.
2169//   * `triple`         — episodes ↔ entities via `triples` (subject_id /
2170//     object_id / source_episode_id added in migration 0007).
2171//   * `semantic`       — HNSW top-K similar episodes (re-embeds the source
2172//     episode's content via the tenant embedder, then calls the same
2173//     pipeline as `/memory/search`; cheaper than a separate embeddings-
2174//     table fetch path and reuses one well-tested code path).
2175//
2176// **Node-id prefix convention** (locked in this PR; the future
2177// `/v1/graph/nodes` + `/v1/graph/inspect/:id` endpoints will use the
2178// same scheme):
2179//   * `ep:<memory_id>`     — episode (memory_id = UUID v7)
2180//   * `doc:<doc_id>`       — document (doc_id   = UUID v7)
2181//   * `chunk:<chunk_id>`   — chunk    (chunk_id = UUID v7)
2182//   * `cl:<cluster_id>`    — cluster
2183//   * `ent:<value>`        — entity (synthetic — minted from a triple's
2184//     subject_id / object_id; value is the raw string verbatim, no
2185//     URL-encoding — `:` and other punctuation appear in real entity
2186//     ids in the wild).
2187//
2188// Entity nodes are synthetic: there's no `entities` table. They're derived
2189// on-the-fly from triples and only exist in the wire format. Two entity
2190// nodes with the same `ent:<value>` are the same node.
2191//
2192// **Read-only**: no audit emit (lesson #30 — graph expand is a derived view
2193// over already-audited primitives; the explicit-query audit events from
2194// `memory.recall` / `memory.inspect` / `memory.facts_about` cover the
2195// underlying reads).
2196//
2197// Tests live inline in `handler_tests` below.
2198
2199const GRAPH_EXPAND_DEFAULT_LIMIT: u32 = 25;
2200const GRAPH_EXPAND_MAX_LIMIT: u32 = 100;
2201
2202/// Edge-kind discriminator. Drives which expansion path runs and what edge
2203/// kind appears in the response.
2204#[derive(Debug, Clone, Copy, Deserialize)]
2205#[serde(rename_all = "snake_case")]
2206enum GraphExpandKind {
2207    ClusterMember,
2208    DocumentChunk,
2209    Triple,
2210    Semantic,
2211}
2212
2213#[derive(Debug, Deserialize)]
2214struct GraphExpandQuery {
2215    node_id: String,
2216    kind: GraphExpandKind,
2217    #[serde(default)]
2218    limit: Option<u32>,
2219}
2220
2221/// Source-node kind, derived from the `node_id` prefix.
2222#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2223enum NodeKind {
2224    Episode,
2225    Document,
2226    Chunk,
2227    Cluster,
2228    Entity,
2229}
2230
2231impl NodeKind {
2232    fn as_wire_str(self) -> &'static str {
2233        match self {
2234            Self::Episode => "episode",
2235            Self::Document => "document",
2236            Self::Chunk => "chunk",
2237            Self::Cluster => "cluster",
2238            Self::Entity => "entity",
2239        }
2240    }
2241}
2242
2243/// Decompose `<prefix>:<value>` into (kind, raw value). Returns 400 on
2244/// unknown prefix / empty value / no `:`.
2245fn parse_node_id(raw: &str) -> Result<(NodeKind, &str), ApiError> {
2246    let (prefix, value) = raw.split_once(':').ok_or_else(|| {
2247        ApiError::bad_request(format!(
2248            "node_id must be `<prefix>:<value>` (one of ep:/doc:/chunk:/cl:/ent:); got {raw:?}"
2249        ))
2250    })?;
2251    if value.is_empty() {
2252        return Err(ApiError::bad_request(format!(
2253            "node_id value is empty after prefix: {raw:?}"
2254        )));
2255    }
2256    let kind = match prefix {
2257        "ep" => NodeKind::Episode,
2258        "doc" => NodeKind::Document,
2259        "chunk" => NodeKind::Chunk,
2260        "cl" => NodeKind::Cluster,
2261        "ent" => NodeKind::Entity,
2262        other => {
2263            return Err(ApiError::bad_request(format!(
2264                "unknown node_id prefix {other:?}; expected one of ep:/doc:/chunk:/cl:/ent:"
2265            )));
2266        }
2267    };
2268    Ok((kind, value))
2269}
2270
2271/// One node in the graph-expand response. Mirrors solo-web's `GraphNode`
2272/// TS interface (see `solo-web/src/api/types.ts`).
2273#[derive(Debug, Serialize)]
2274struct GraphNode {
2275    id: String,
2276    kind: &'static str,
2277    label: String,
2278    #[serde(skip_serializing_if = "Option::is_none")]
2279    ts_ms: Option<i64>,
2280    tenant_id: String,
2281    #[serde(skip_serializing_if = "Option::is_none")]
2282    preview: Option<String>,
2283}
2284
2285/// One edge. Mirrors `GraphEdge` in solo-web TS types. `id` is a composite
2286/// `${source}--${kind}--${target}` so the renderer can dedupe.
2287#[derive(Debug, Serialize)]
2288struct GraphEdge {
2289    id: String,
2290    source: String,
2291    target: String,
2292    kind: &'static str,
2293    #[serde(skip_serializing_if = "Option::is_none")]
2294    predicate: Option<String>,
2295    #[serde(skip_serializing_if = "Option::is_none")]
2296    weight: Option<f32>,
2297}
2298
2299#[derive(Debug, Serialize)]
2300struct GraphExpandResponse {
2301    nodes: Vec<GraphNode>,
2302    edges: Vec<GraphEdge>,
2303}
2304
2305fn edge_id(source: &str, kind: &str, target: &str) -> String {
2306    format!("{source}--{kind}--{target}")
2307}
2308
2309/// Episode summary needed to mint a `GraphNode` from an episode row.
2310#[derive(Debug)]
2311struct ExpandedEpisode {
2312    memory_id: String,
2313    ts_ms: i64,
2314    content: String,
2315}
2316
2317/// Document summary.
2318#[derive(Debug)]
2319struct ExpandedDocument {
2320    doc_id: String,
2321    title: Option<String>,
2322    source: Option<String>,
2323    ingested_at_ms: i64,
2324}
2325
2326/// Chunk summary.
2327#[derive(Debug)]
2328struct ExpandedChunk {
2329    chunk_id: String,
2330    chunk_index: i64,
2331    content: String,
2332}
2333
2334fn truncate_preview(s: &str, max: usize) -> String {
2335    if s.chars().count() <= max {
2336        return s.to_string();
2337    }
2338    let mut out: String = s.chars().take(max - 1).collect();
2339    out.push('…');
2340    out
2341}
2342
2343/// First-line label cap. Keeps payloads tight for the graph renderer
2344/// (labels are headings, not full content).
2345const GRAPH_LABEL_CHARS: usize = 80;
2346const GRAPH_PREVIEW_CHARS: usize = 200;
2347
2348fn episode_label(content: &str) -> String {
2349    let first_line = content.lines().next().unwrap_or(content);
2350    truncate_preview(first_line, GRAPH_LABEL_CHARS)
2351}
2352
2353fn graph_node_for_episode(tenant_id: &str, ep: &ExpandedEpisode) -> GraphNode {
2354    GraphNode {
2355        id: format!("ep:{}", ep.memory_id),
2356        kind: NodeKind::Episode.as_wire_str(),
2357        label: episode_label(&ep.content),
2358        ts_ms: Some(ep.ts_ms),
2359        tenant_id: tenant_id.to_string(),
2360        preview: Some(truncate_preview(&ep.content, GRAPH_PREVIEW_CHARS)),
2361    }
2362}
2363
2364fn graph_node_for_document(tenant_id: &str, d: &ExpandedDocument) -> GraphNode {
2365    let label = d
2366        .title
2367        .clone()
2368        .or_else(|| d.source.clone())
2369        .unwrap_or_else(|| d.doc_id.clone());
2370    GraphNode {
2371        id: format!("doc:{}", d.doc_id),
2372        kind: NodeKind::Document.as_wire_str(),
2373        label: truncate_preview(&label, GRAPH_LABEL_CHARS),
2374        ts_ms: Some(d.ingested_at_ms),
2375        tenant_id: tenant_id.to_string(),
2376        preview: d.source.clone(),
2377    }
2378}
2379
2380fn graph_node_for_chunk(tenant_id: &str, c: &ExpandedChunk) -> GraphNode {
2381    GraphNode {
2382        id: format!("chunk:{}", c.chunk_id),
2383        kind: NodeKind::Chunk.as_wire_str(),
2384        label: format!("chunk #{}: {}", c.chunk_index, episode_label(&c.content)),
2385        ts_ms: None,
2386        tenant_id: tenant_id.to_string(),
2387        preview: Some(truncate_preview(&c.content, GRAPH_PREVIEW_CHARS)),
2388    }
2389}
2390
2391fn graph_node_for_cluster(
2392    tenant_id: &str,
2393    cluster_id: &str,
2394    abstraction: Option<&str>,
2395    created_at_ms: i64,
2396) -> GraphNode {
2397    let label = abstraction
2398        .map(|a| truncate_preview(a, GRAPH_LABEL_CHARS))
2399        .unwrap_or_else(|| format!("cluster {cluster_id}"));
2400    GraphNode {
2401        id: format!("cl:{cluster_id}"),
2402        kind: NodeKind::Cluster.as_wire_str(),
2403        label,
2404        ts_ms: Some(created_at_ms),
2405        tenant_id: tenant_id.to_string(),
2406        preview: abstraction.map(|a| truncate_preview(a, GRAPH_PREVIEW_CHARS)),
2407    }
2408}
2409
2410fn graph_node_for_entity(tenant_id: &str, value: &str) -> GraphNode {
2411    GraphNode {
2412        id: format!("ent:{value}"),
2413        kind: NodeKind::Entity.as_wire_str(),
2414        label: truncate_preview(value, GRAPH_LABEL_CHARS),
2415        ts_ms: None,
2416        tenant_id: tenant_id.to_string(),
2417        preview: None,
2418    }
2419}
2420
2421/// `GET /v1/graph/expand`. See module-level comments for the contract.
2422async fn graph_expand_handler(
2423    TenantExtractor(tenant): TenantExtractor,
2424    Query(q): Query<GraphExpandQuery>,
2425) -> Result<Json<GraphExpandResponse>, ApiError> {
2426    // Silent clamp at GRAPH_EXPAND_MAX_LIMIT — matches the rest of
2427    // solo-query's read pipelines (recall, themes, etc.). Documented in
2428    // the OpenAPI spec.
2429    let limit = q.limit.unwrap_or(GRAPH_EXPAND_DEFAULT_LIMIT);
2430    let limit = limit.clamp(1, GRAPH_EXPAND_MAX_LIMIT) as i64;
2431
2432    let (node_kind, value) = parse_node_id(&q.node_id)?;
2433    let value = value.to_string();
2434    let node_id_full = q.node_id.clone();
2435    let tenant_id_str = tenant.tenant_id().to_string();
2436
2437    match q.kind {
2438        GraphExpandKind::ClusterMember => {
2439            expand_cluster_member(
2440                &tenant,
2441                &tenant_id_str,
2442                node_kind,
2443                &value,
2444                &node_id_full,
2445                limit,
2446            )
2447            .await
2448        }
2449        GraphExpandKind::DocumentChunk => {
2450            expand_document_chunk(
2451                &tenant,
2452                &tenant_id_str,
2453                node_kind,
2454                &value,
2455                &node_id_full,
2456                limit,
2457            )
2458            .await
2459        }
2460        GraphExpandKind::Triple => {
2461            expand_triple(
2462                &tenant,
2463                &tenant_id_str,
2464                node_kind,
2465                &value,
2466                &node_id_full,
2467                limit,
2468            )
2469            .await
2470        }
2471        GraphExpandKind::Semantic => {
2472            expand_semantic(
2473                &tenant,
2474                &tenant_id_str,
2475                node_kind,
2476                &value,
2477                &node_id_full,
2478                limit,
2479            )
2480            .await
2481        }
2482    }
2483    .map(Json)
2484}
2485
2486// ---- cluster_member ----
2487
2488async fn expand_cluster_member(
2489    tenant: &TenantHandle,
2490    tenant_id: &str,
2491    node_kind: NodeKind,
2492    value: &str,
2493    node_id_full: &str,
2494    limit: i64,
2495) -> Result<GraphExpandResponse, ApiError> {
2496    match node_kind {
2497        NodeKind::Episode => {
2498            expand_cluster_member_from_episode(
2499                tenant,
2500                tenant_id,
2501                value.to_string(),
2502                node_id_full.to_string(),
2503                limit,
2504            )
2505            .await
2506        }
2507        NodeKind::Cluster => {
2508            expand_cluster_member_from_cluster(
2509                tenant,
2510                tenant_id,
2511                value.to_string(),
2512                node_id_full.to_string(),
2513                limit,
2514            )
2515            .await
2516        }
2517        _ => Err(ApiError::bad_request(format!(
2518            "kind=cluster_member only valid for episode or cluster source nodes; got {}",
2519            node_kind.as_wire_str()
2520        ))),
2521    }
2522}
2523
2524async fn expand_cluster_member_from_episode(
2525    tenant: &TenantHandle,
2526    tenant_id: &str,
2527    memory_id: String,
2528    node_id_full: String,
2529    limit: i64,
2530) -> Result<GraphExpandResponse, ApiError> {
2531    let memory_id_for_err = memory_id.clone();
2532    let rows: Vec<(String, Option<String>, i64)> = tenant
2533        .read()
2534        .interact(move |conn| {
2535            // First confirm the source episode exists in this tenant.
2536            let exists: i64 = conn.query_row(
2537                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
2538                rusqlite::params![&memory_id],
2539                |r| r.get(0),
2540            )?;
2541            if exists == 0 {
2542                return Ok(Vec::new());
2543            }
2544            let mut stmt = conn.prepare(
2545                "SELECT c.cluster_id, sa.content, c.created_at_ms
2546                   FROM cluster_episodes ce
2547                   JOIN clusters c ON c.cluster_id = ce.cluster_id
2548                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
2549                  WHERE ce.memory_id = ?1
2550                  ORDER BY c.created_at_ms DESC
2551                  LIMIT ?2",
2552            )?;
2553            let mapped = stmt
2554                .query_map(rusqlite::params![&memory_id, limit], |r| {
2555                    Ok((
2556                        r.get::<_, String>(0)?,
2557                        r.get::<_, Option<String>>(1)?,
2558                        r.get::<_, i64>(2)?,
2559                    ))
2560                })?
2561                .collect::<rusqlite::Result<Vec<_>>>()?;
2562            // Marker tuple to signal "episode found" via Vec emptiness +
2563            // an extra sentinel; we use a different shape:
2564            // pack the "found" flag via an out-of-band trick — actually
2565            // we re-query above. Keep it simple: confirm again here by
2566            // returning the rows; a missing episode short-circuits to
2567            // a 404 below via the `exists == 0` guard.
2568            Ok::<_, rusqlite::Error>(mapped)
2569        })
2570        .await
2571        .map_err(ApiError::from)?;
2572
2573    // The interact() returns Vec<(...)>; but we need to distinguish "no
2574    // such episode" (→ 404) from "episode exists, has no clusters" (→
2575    // 200 with empty arrays). Re-run a cheap existence check separately
2576    // — we already inlined it above and returned `Vec::new()` on miss,
2577    // but a real miss is indistinguishable from "episode in zero
2578    // clusters". Use a separate existence probe.
2579    if rows.is_empty() {
2580        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
2581        return Ok(GraphExpandResponse {
2582            nodes: Vec::new(),
2583            edges: Vec::new(),
2584        });
2585    }
2586
2587    let mut nodes = Vec::with_capacity(rows.len());
2588    let mut edges = Vec::with_capacity(rows.len());
2589    for (cluster_id, abstraction, created_at_ms) in rows {
2590        let target_id = format!("cl:{cluster_id}");
2591        edges.push(GraphEdge {
2592            id: edge_id(&node_id_full, "cluster_member", &target_id),
2593            source: node_id_full.clone(),
2594            target: target_id,
2595            kind: "cluster_member",
2596            predicate: None,
2597            weight: None,
2598        });
2599        nodes.push(graph_node_for_cluster(
2600            tenant_id,
2601            &cluster_id,
2602            abstraction.as_deref(),
2603            created_at_ms,
2604        ));
2605    }
2606    Ok(GraphExpandResponse { nodes, edges })
2607}
2608
2609async fn expand_cluster_member_from_cluster(
2610    tenant: &TenantHandle,
2611    tenant_id: &str,
2612    cluster_id: String,
2613    node_id_full: String,
2614    limit: i64,
2615) -> Result<GraphExpandResponse, ApiError> {
2616    let cluster_id_for_err = cluster_id.clone();
2617    let rows: Vec<ExpandedEpisode> = tenant
2618        .read()
2619        .interact(move |conn| {
2620            let exists: i64 = conn.query_row(
2621                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
2622                rusqlite::params![&cluster_id],
2623                |r| r.get(0),
2624            )?;
2625            if exists == 0 {
2626                return Ok(Vec::new());
2627            }
2628            let mut stmt = conn.prepare(
2629                "SELECT e.memory_id, e.ts_ms, e.content
2630                   FROM cluster_episodes ce
2631                   JOIN episodes e ON e.memory_id = ce.memory_id
2632                  WHERE ce.cluster_id = ?1
2633                    AND e.status = 'active'
2634                  ORDER BY e.ts_ms DESC
2635                  LIMIT ?2",
2636            )?;
2637            let mapped = stmt
2638                .query_map(rusqlite::params![&cluster_id, limit], |r| {
2639                    Ok(ExpandedEpisode {
2640                        memory_id: r.get(0)?,
2641                        ts_ms: r.get(1)?,
2642                        content: r.get(2)?,
2643                    })
2644                })?
2645                .collect::<rusqlite::Result<Vec<_>>>()?;
2646            Ok::<_, rusqlite::Error>(mapped)
2647        })
2648        .await
2649        .map_err(ApiError::from)?;
2650
2651    if rows.is_empty() {
2652        ensure_cluster_exists(tenant, &cluster_id_for_err, &node_id_full).await?;
2653        return Ok(GraphExpandResponse {
2654            nodes: Vec::new(),
2655            edges: Vec::new(),
2656        });
2657    }
2658
2659    let mut nodes = Vec::with_capacity(rows.len());
2660    let mut edges = Vec::with_capacity(rows.len());
2661    for ep in rows {
2662        let target_id = format!("ep:{}", ep.memory_id);
2663        edges.push(GraphEdge {
2664            id: edge_id(&node_id_full, "cluster_member", &target_id),
2665            source: node_id_full.clone(),
2666            target: target_id,
2667            kind: "cluster_member",
2668            predicate: None,
2669            weight: None,
2670        });
2671        nodes.push(graph_node_for_episode(tenant_id, &ep));
2672    }
2673    Ok(GraphExpandResponse { nodes, edges })
2674}
2675
2676// ---- document_chunk ----
2677
2678async fn expand_document_chunk(
2679    tenant: &TenantHandle,
2680    tenant_id: &str,
2681    node_kind: NodeKind,
2682    value: &str,
2683    node_id_full: &str,
2684    limit: i64,
2685) -> Result<GraphExpandResponse, ApiError> {
2686    match node_kind {
2687        NodeKind::Document => {
2688            expand_document_chunk_from_document(
2689                tenant,
2690                tenant_id,
2691                value.to_string(),
2692                node_id_full.to_string(),
2693                limit,
2694            )
2695            .await
2696        }
2697        NodeKind::Chunk => {
2698            expand_document_chunk_from_chunk(
2699                tenant,
2700                tenant_id,
2701                value.to_string(),
2702                node_id_full.to_string(),
2703            )
2704            .await
2705        }
2706        _ => Err(ApiError::bad_request(format!(
2707            "kind=document_chunk only valid for document or chunk source nodes; got {}",
2708            node_kind.as_wire_str()
2709        ))),
2710    }
2711}
2712
2713async fn expand_document_chunk_from_document(
2714    tenant: &TenantHandle,
2715    tenant_id: &str,
2716    doc_id: String,
2717    node_id_full: String,
2718    limit: i64,
2719) -> Result<GraphExpandResponse, ApiError> {
2720    let doc_id_for_err = doc_id.clone();
2721    let rows: Vec<ExpandedChunk> = tenant
2722        .read()
2723        .interact(move |conn| {
2724            let exists: i64 = conn.query_row(
2725                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
2726                rusqlite::params![&doc_id],
2727                |r| r.get(0),
2728            )?;
2729            if exists == 0 {
2730                return Ok(Vec::new());
2731            }
2732            let mut stmt = conn.prepare(
2733                "SELECT chunk_id, chunk_index, content
2734                   FROM document_chunks
2735                  WHERE doc_id = ?1
2736                  ORDER BY chunk_index ASC
2737                  LIMIT ?2",
2738            )?;
2739            let mapped = stmt
2740                .query_map(rusqlite::params![&doc_id, limit], |r| {
2741                    Ok(ExpandedChunk {
2742                        chunk_id: r.get(0)?,
2743                        chunk_index: r.get(1)?,
2744                        content: r.get(2)?,
2745                    })
2746                })?
2747                .collect::<rusqlite::Result<Vec<_>>>()?;
2748            Ok::<_, rusqlite::Error>(mapped)
2749        })
2750        .await
2751        .map_err(ApiError::from)?;
2752
2753    if rows.is_empty() {
2754        ensure_document_exists(tenant, &doc_id_for_err, &node_id_full).await?;
2755        return Ok(GraphExpandResponse {
2756            nodes: Vec::new(),
2757            edges: Vec::new(),
2758        });
2759    }
2760
2761    let mut nodes = Vec::with_capacity(rows.len());
2762    let mut edges = Vec::with_capacity(rows.len());
2763    for c in rows {
2764        let target_id = format!("chunk:{}", c.chunk_id);
2765        edges.push(GraphEdge {
2766            id: edge_id(&node_id_full, "document_chunk", &target_id),
2767            source: node_id_full.clone(),
2768            target: target_id,
2769            kind: "document_chunk",
2770            predicate: None,
2771            weight: None,
2772        });
2773        nodes.push(graph_node_for_chunk(tenant_id, &c));
2774    }
2775    Ok(GraphExpandResponse { nodes, edges })
2776}
2777
2778async fn expand_document_chunk_from_chunk(
2779    tenant: &TenantHandle,
2780    tenant_id: &str,
2781    chunk_id: String,
2782    node_id_full: String,
2783) -> Result<GraphExpandResponse, ApiError> {
2784    let chunk_id_for_err = chunk_id.clone();
2785    let row: Option<ExpandedDocument> = tenant
2786        .read()
2787        .interact(move |conn| {
2788            conn.query_row(
2789                "SELECT d.doc_id, d.title, d.source, d.ingested_at_ms
2790                   FROM document_chunks c
2791                   JOIN documents d ON d.doc_id = c.doc_id
2792                  WHERE c.chunk_id = ?1",
2793                rusqlite::params![&chunk_id],
2794                |r| {
2795                    Ok(ExpandedDocument {
2796                        doc_id: r.get(0)?,
2797                        title: r.get(1)?,
2798                        source: r.get(2)?,
2799                        ingested_at_ms: r.get(3)?,
2800                    })
2801                },
2802            )
2803            .map(Some)
2804            .or_else(|e| match e {
2805                rusqlite::Error::QueryReturnedNoRows => Ok(None),
2806                other => Err(other),
2807            })
2808        })
2809        .await
2810        .map_err(ApiError::from)?;
2811
2812    let d = row.ok_or_else(|| {
2813        ApiError::not_found(format!(
2814            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
2815        ))
2816    })?;
2817    let target_id = format!("doc:{}", d.doc_id);
2818    let edge = GraphEdge {
2819        id: edge_id(&node_id_full, "document_chunk", &target_id),
2820        source: node_id_full.clone(),
2821        target: target_id,
2822        kind: "document_chunk",
2823        predicate: None,
2824        weight: None,
2825    };
2826    let node = graph_node_for_document(tenant_id, &d);
2827    Ok(GraphExpandResponse {
2828        nodes: vec![node],
2829        edges: vec![edge],
2830    })
2831}
2832
2833// ---- triple ----
2834
2835async fn expand_triple(
2836    tenant: &TenantHandle,
2837    tenant_id: &str,
2838    node_kind: NodeKind,
2839    value: &str,
2840    node_id_full: &str,
2841    limit: i64,
2842) -> Result<GraphExpandResponse, ApiError> {
2843    match node_kind {
2844        NodeKind::Episode => {
2845            expand_triple_from_episode(
2846                tenant,
2847                tenant_id,
2848                value.to_string(),
2849                node_id_full.to_string(),
2850                limit,
2851            )
2852            .await
2853        }
2854        NodeKind::Entity => {
2855            expand_triple_from_entity(
2856                tenant,
2857                tenant_id,
2858                value.to_string(),
2859                node_id_full.to_string(),
2860                limit,
2861            )
2862            .await
2863        }
2864        _ => Err(ApiError::bad_request(format!(
2865            "kind=triple only valid for episode or entity source nodes; got {}",
2866            node_kind.as_wire_str()
2867        ))),
2868    }
2869}
2870
2871#[derive(Debug)]
2872struct TripleRow {
2873    subject_id: String,
2874    predicate: String,
2875    object_id: String,
2876    confidence: f32,
2877}
2878
2879async fn expand_triple_from_episode(
2880    tenant: &TenantHandle,
2881    tenant_id: &str,
2882    memory_id: String,
2883    node_id_full: String,
2884    limit: i64,
2885) -> Result<GraphExpandResponse, ApiError> {
2886    let memory_id_for_err = memory_id.clone();
2887    let rows: Vec<TripleRow> = tenant
2888        .read()
2889        .interact(move |conn| {
2890            // Episode rowid lookup (triples FK is INTEGER rowid, not memory_id).
2891            let rowid_opt: Option<i64> = conn
2892                .query_row(
2893                    "SELECT rowid FROM episodes WHERE memory_id = ?1",
2894                    rusqlite::params![&memory_id],
2895                    |r| r.get(0),
2896                )
2897                .map(Some)
2898                .or_else(|e| match e {
2899                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
2900                    other => Err(other),
2901                })?;
2902            let Some(rowid) = rowid_opt else {
2903                return Ok(Vec::new());
2904            };
2905            let mut stmt = conn.prepare(
2906                "SELECT subject_id, predicate, object_id, confidence
2907                   FROM triples
2908                  WHERE source_episode_id = ?1
2909                    AND status = 'active'
2910                  ORDER BY valid_from_ms DESC
2911                  LIMIT ?2",
2912            )?;
2913            let mapped = stmt
2914                .query_map(rusqlite::params![rowid, limit], |r| {
2915                    Ok(TripleRow {
2916                        subject_id: r.get(0)?,
2917                        predicate: r.get(1)?,
2918                        object_id: r.get(2)?,
2919                        confidence: r.get(3)?,
2920                    })
2921                })?
2922                .collect::<rusqlite::Result<Vec<_>>>()?;
2923            Ok::<_, rusqlite::Error>(mapped)
2924        })
2925        .await
2926        .map_err(ApiError::from)?;
2927
2928    if rows.is_empty() {
2929        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
2930        return Ok(GraphExpandResponse {
2931            nodes: Vec::new(),
2932            edges: Vec::new(),
2933        });
2934    }
2935
2936    let mut nodes = Vec::new();
2937    let mut edges = Vec::new();
2938    let mut seen_entities: std::collections::HashSet<String> = Default::default();
2939    for t in rows {
2940        // Mint both endpoints as entity nodes. The source episode is
2941        // node_id_full; each triple becomes two edges (source→subj +
2942        // subj→obj) connected through the entity nodes, OR a single
2943        // edge labelled with the predicate from the source episode to
2944        // a representative entity. The TS schema treats `triple` as a
2945        // single edge with `predicate`; we emit one edge per triple:
2946        // source_episode → subject_entity (kind=triple, predicate=p),
2947        // plus one extra edge subject_entity → object_entity (also
2948        // kind=triple, same predicate) so a renderer can hop along the
2949        // SPO graph.
2950        let subj_id = format!("ent:{}", t.subject_id);
2951        let obj_id = format!("ent:{}", t.object_id);
2952        if seen_entities.insert(t.subject_id.clone()) {
2953            nodes.push(graph_node_for_entity(tenant_id, &t.subject_id));
2954        }
2955        if seen_entities.insert(t.object_id.clone()) {
2956            nodes.push(graph_node_for_entity(tenant_id, &t.object_id));
2957        }
2958        edges.push(GraphEdge {
2959            id: edge_id(&subj_id, "triple", &obj_id),
2960            source: subj_id,
2961            target: obj_id,
2962            kind: "triple",
2963            predicate: Some(t.predicate),
2964            weight: Some(t.confidence),
2965        });
2966    }
2967    Ok(GraphExpandResponse { nodes, edges })
2968}
2969
2970async fn expand_triple_from_entity(
2971    tenant: &TenantHandle,
2972    tenant_id: &str,
2973    entity_value: String,
2974    node_id_full: String,
2975    limit: i64,
2976) -> Result<GraphExpandResponse, ApiError> {
2977    // Entity nodes are synthetic — there's no existence check we can
2978    // run. "Unknown entity" naturally resolves to an empty result.
2979    let entity_q = entity_value.clone();
2980    let rows: Vec<ExpandedEpisode> = tenant
2981        .read()
2982        .interact(move |conn| {
2983            // Find episodes whose triples reference this entity on either
2984            // side. JOIN against episodes.rowid via triples.source_episode_id.
2985            let mut stmt = conn.prepare(
2986                "SELECT DISTINCT e.memory_id, e.ts_ms, e.content
2987                   FROM triples t
2988                   JOIN episodes e ON e.rowid = t.source_episode_id
2989                  WHERE (t.subject_id = ?1 OR t.object_id = ?1)
2990                    AND t.status = 'active'
2991                    AND t.source_episode_id IS NOT NULL
2992                    AND e.status = 'active'
2993                  ORDER BY e.ts_ms DESC
2994                  LIMIT ?2",
2995            )?;
2996            let mapped = stmt
2997                .query_map(rusqlite::params![&entity_q, limit], |r| {
2998                    Ok(ExpandedEpisode {
2999                        memory_id: r.get(0)?,
3000                        ts_ms: r.get(1)?,
3001                        content: r.get(2)?,
3002                    })
3003                })?
3004                .collect::<rusqlite::Result<Vec<_>>>()?;
3005            Ok::<_, rusqlite::Error>(mapped)
3006        })
3007        .await
3008        .map_err(ApiError::from)?;
3009
3010    // Empty result on entity expand is a valid 200 — the entity exists
3011    // only in the wire format; "no edges" is the right answer.
3012    let mut nodes = Vec::with_capacity(rows.len());
3013    let mut edges = Vec::with_capacity(rows.len());
3014    for ep in rows {
3015        let target_id = format!("ep:{}", ep.memory_id);
3016        edges.push(GraphEdge {
3017            id: edge_id(&node_id_full, "triple", &target_id),
3018            source: node_id_full.clone(),
3019            target: target_id,
3020            kind: "triple",
3021            predicate: None,
3022            weight: None,
3023        });
3024        nodes.push(graph_node_for_episode(tenant_id, &ep));
3025    }
3026    // Annotate _ to suppress unused (only used in match guard).
3027    let _ = entity_value;
3028    Ok(GraphExpandResponse { nodes, edges })
3029}
3030
3031// ---- semantic ----
3032
3033async fn expand_semantic(
3034    tenant: &TenantHandle,
3035    tenant_id: &str,
3036    node_kind: NodeKind,
3037    value: &str,
3038    node_id_full: &str,
3039    limit: i64,
3040) -> Result<GraphExpandResponse, ApiError> {
3041    if node_kind != NodeKind::Episode {
3042        return Err(ApiError::bad_request(format!(
3043            "kind=semantic only valid for episode source nodes; got {}",
3044            node_kind.as_wire_str()
3045        )));
3046    }
3047    let memory_id = value.to_string();
3048    let memory_id_q = memory_id.clone();
3049    // Fetch the source episode's content so we can re-embed it and call
3050    // the existing HNSW pipeline. Cheaper-than-extra-machinery: reuses
3051    // the well-tested `run_recall_inner` path that already filters
3052    // forgotten rows + decodes hnsw ids.
3053    let content: Option<String> = tenant
3054        .read()
3055        .interact(move |conn| {
3056            conn.query_row(
3057                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
3058                rusqlite::params![&memory_id_q],
3059                |r| r.get::<_, String>(0),
3060            )
3061            .map(Some)
3062            .or_else(|e| match e {
3063                rusqlite::Error::QueryReturnedNoRows => Ok(None),
3064                other => Err(other),
3065            })
3066        })
3067        .await
3068        .map_err(ApiError::from)?;
3069
3070    let content = content.ok_or_else(|| {
3071        ApiError::not_found(format!(
3072            "node_id {node_id_full:?} (memory_id {memory_id}) not found in current tenant"
3073        ))
3074    })?;
3075
3076    // Pull one extra hit so we can drop self without losing user-requested
3077    // count. limit is already ≤ MAX_LIMIT; +1 stays within reason.
3078    let widened = (limit as usize).saturating_add(1).min(100);
3079    let result = solo_query::recall::run_recall_inner(
3080        tenant.embedder(),
3081        tenant.hnsw(),
3082        tenant.read(),
3083        &content,
3084        widened,
3085    )
3086    .await
3087    .map_err(ApiError::from)?;
3088
3089    let mut nodes = Vec::new();
3090    let mut edges = Vec::new();
3091    for hit in result.hits.into_iter() {
3092        if hit.memory_id == memory_id {
3093            // Skip self.
3094            continue;
3095        }
3096        if nodes.len() as i64 >= limit {
3097            break;
3098        }
3099        // The HNSW `cos_distance` is a distance (smaller = more similar).
3100        // Convert to a weight in [0, 1] (larger = more similar) for the
3101        // wire format: weight = (1 - distance).max(0).
3102        let weight = (1.0 - hit.cos_distance).max(0.0);
3103        let target_id = format!("ep:{}", hit.memory_id);
3104        edges.push(GraphEdge {
3105            id: edge_id(node_id_full, "semantic", &target_id),
3106            source: node_id_full.to_string(),
3107            target: target_id,
3108            kind: "semantic",
3109            predicate: None,
3110            weight: Some(weight),
3111        });
3112        nodes.push(GraphNode {
3113            id: format!("ep:{}", hit.memory_id),
3114            kind: NodeKind::Episode.as_wire_str(),
3115            label: episode_label(&hit.content),
3116            ts_ms: None,
3117            tenant_id: tenant_id.to_string(),
3118            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
3119        });
3120    }
3121    Ok(GraphExpandResponse { nodes, edges })
3122}
3123
3124// ---- existence checks ----
3125
3126/// 404 if the memory_id has no row in this tenant's `episodes` table.
3127async fn ensure_episode_exists(
3128    tenant: &TenantHandle,
3129    memory_id: &str,
3130    node_id_full: &str,
3131) -> Result<(), ApiError> {
3132    let memory_id_q = memory_id.to_string();
3133    let exists: i64 = tenant
3134        .read()
3135        .interact(move |conn| {
3136            conn.query_row(
3137                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
3138                rusqlite::params![&memory_id_q],
3139                |r| r.get(0),
3140            )
3141        })
3142        .await
3143        .map_err(ApiError::from)?;
3144    if exists == 0 {
3145        return Err(ApiError::not_found(format!(
3146            "node_id {node_id_full:?} not found in current tenant"
3147        )));
3148    }
3149    Ok(())
3150}
3151
3152async fn ensure_cluster_exists(
3153    tenant: &TenantHandle,
3154    cluster_id: &str,
3155    node_id_full: &str,
3156) -> Result<(), ApiError> {
3157    let cluster_id_q = cluster_id.to_string();
3158    let exists: i64 = tenant
3159        .read()
3160        .interact(move |conn| {
3161            conn.query_row(
3162                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
3163                rusqlite::params![&cluster_id_q],
3164                |r| r.get(0),
3165            )
3166        })
3167        .await
3168        .map_err(ApiError::from)?;
3169    if exists == 0 {
3170        return Err(ApiError::not_found(format!(
3171            "node_id {node_id_full:?} not found in current tenant"
3172        )));
3173    }
3174    Ok(())
3175}
3176
3177async fn ensure_document_exists(
3178    tenant: &TenantHandle,
3179    doc_id: &str,
3180    node_id_full: &str,
3181) -> Result<(), ApiError> {
3182    let doc_id_q = doc_id.to_string();
3183    let exists: i64 = tenant
3184        .read()
3185        .interact(move |conn| {
3186            conn.query_row(
3187                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
3188                rusqlite::params![&doc_id_q],
3189                |r| r.get(0),
3190            )
3191        })
3192        .await
3193        .map_err(ApiError::from)?;
3194    if exists == 0 {
3195        return Err(ApiError::not_found(format!(
3196            "node_id {node_id_full:?} not found in current tenant"
3197        )));
3198    }
3199    Ok(())
3200}
3201
3202// ---------------------------------------------------------------------------
3203// Graph nodes + edges — paginated catalog reads (v0.10.0)
3204//
3205// `GET /v1/graph/nodes` and `GET /v1/graph/edges` are the bundle that
3206// powers solo-web's initial graph render. Both are read-only, both
3207// share the same tenant / auth / cursor scaffolding, both inherit the
3208// node-id prefix convention from `/v1/graph/expand` (ep:/doc:/chunk:/cl:/ent:).
3209//
3210// See `docs/dev-log/0114-graph-nodes-edges-impl.md` for the design
3211// notes (cursor format, entity scan strategy, semantic-edge rejection
3212// rationale, UNION pagination shape).
3213// ---------------------------------------------------------------------------
3214
3215const GRAPH_NODES_DEFAULT_LIMIT: u32 = 100;
3216const GRAPH_NODES_MAX_LIMIT: u32 = 1000;
3217const GRAPH_EDGES_DEFAULT_LIMIT: u32 = 200;
3218const GRAPH_EDGES_MAX_LIMIT: u32 = 2000;
3219const GRAPH_ENTITY_CAP: usize = 200;
3220
3221/// Header set when the entity scan hit `GRAPH_ENTITY_CAP` and lower-
3222/// frequency entities were dropped from the response. Clients can show
3223/// "entities truncated" UX without parsing the body.
3224const ENTITY_CAP_HEADER: &str = "x-solo-entity-cap-reached";
3225
3226#[derive(Debug, Deserialize)]
3227struct GraphNodesQuery {
3228    /// Comma-separated kinds. Empty/missing = all five kinds. Repeated
3229    /// `?kind=` query params are NOT supported by axum's `Query<T>`
3230    /// extractor for `Option<String>` (it picks one) — comma-separated
3231    /// is documented + simpler. Values: episode|document|chunk|cluster|entity.
3232    #[serde(default)]
3233    kind: Option<String>,
3234    #[serde(default)]
3235    since_ms: Option<i64>,
3236    #[serde(default)]
3237    until_ms: Option<i64>,
3238    #[serde(default)]
3239    limit: Option<u32>,
3240    #[serde(default)]
3241    cursor: Option<String>,
3242}
3243
3244#[derive(Debug, Deserialize)]
3245struct GraphEdgesQuery {
3246    #[serde(default)]
3247    node_id: Option<String>,
3248    /// Comma-separated. Default = all kinds EXCEPT semantic.
3249    /// Values: triple|document_chunk|cluster_member|semantic.
3250    #[serde(default)]
3251    r#type: Option<String>,
3252    #[serde(default)]
3253    limit: Option<u32>,
3254    #[serde(default)]
3255    cursor: Option<String>,
3256}
3257
3258#[derive(Debug, Serialize)]
3259struct GraphNodesResponse {
3260    nodes: Vec<GraphNode>,
3261    #[serde(skip_serializing_if = "Option::is_none")]
3262    next_cursor: Option<String>,
3263}
3264
3265#[derive(Debug, Serialize)]
3266struct GraphEdgesResponse {
3267    edges: Vec<GraphEdge>,
3268    #[serde(skip_serializing_if = "Option::is_none")]
3269    next_cursor: Option<String>,
3270}
3271
3272/// Decode the `kind` filter from the query string. Returns the set of
3273/// kinds the caller wants (all five when filter absent / empty). 400 on
3274/// unknown kind.
3275fn parse_node_kind_filter(raw: Option<&str>) -> Result<Vec<NodeKind>, ApiError> {
3276    let raw = raw.unwrap_or("").trim();
3277    if raw.is_empty() {
3278        return Ok(vec![
3279            NodeKind::Episode,
3280            NodeKind::Document,
3281            NodeKind::Chunk,
3282            NodeKind::Cluster,
3283            NodeKind::Entity,
3284        ]);
3285    }
3286    let mut out = Vec::new();
3287    for token in raw.split(',') {
3288        let token = token.trim();
3289        if token.is_empty() {
3290            continue;
3291        }
3292        let kind = match token {
3293            "episode" => NodeKind::Episode,
3294            "document" => NodeKind::Document,
3295            "chunk" => NodeKind::Chunk,
3296            "cluster" => NodeKind::Cluster,
3297            "entity" => NodeKind::Entity,
3298            other => {
3299                return Err(ApiError::bad_request(format!(
3300                    "unknown node kind {other:?}; expected one of episode/document/chunk/cluster/entity"
3301                )));
3302            }
3303        };
3304        if !out.contains(&kind) {
3305            out.push(kind);
3306        }
3307    }
3308    if out.is_empty() {
3309        return Err(ApiError::bad_request(
3310            "kind filter is empty after parsing; either omit or list at least one kind",
3311        ));
3312    }
3313    Ok(out)
3314}
3315
3316/// Edge-kind discriminator on `/v1/graph/edges`.
3317#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3318enum EdgeKind {
3319    Triple,
3320    DocumentChunk,
3321    ClusterMember,
3322}
3323
3324impl EdgeKind {
3325    /// Sort-stable kind ordering for pagination. Lower runs first.
3326    fn order_idx(self) -> u8 {
3327        match self {
3328            Self::Triple => 0,
3329            Self::DocumentChunk => 1,
3330            Self::ClusterMember => 2,
3331        }
3332    }
3333}
3334
3335fn parse_edge_kind_filter(raw: Option<&str>) -> Result<Vec<EdgeKind>, ApiError> {
3336    let raw = raw.unwrap_or("").trim();
3337    if raw.is_empty() {
3338        // Default = all three concrete kinds; semantic is opt-in via
3339        // /v1/graph/neighbors/:id (per scoping doc §3 Decision B).
3340        return Ok(vec![
3341            EdgeKind::Triple,
3342            EdgeKind::DocumentChunk,
3343            EdgeKind::ClusterMember,
3344        ]);
3345    }
3346    let mut out = Vec::new();
3347    for token in raw.split(',') {
3348        let token = token.trim();
3349        if token.is_empty() {
3350            continue;
3351        }
3352        let kind = match token {
3353            "triple" => EdgeKind::Triple,
3354            "document_chunk" => EdgeKind::DocumentChunk,
3355            "cluster_member" => EdgeKind::ClusterMember,
3356            "semantic" => {
3357                // semantic edges aren't precomputed; they're HNSW queries
3358                // at request time. Wrong endpoint.
3359                return Err(ApiError::bad_request(
3360                    "semantic edges are available via /v1/graph/neighbors/:id?kind=semantic, not /v1/graph/edges (semantic edges aren't precomputed; they're query-time HNSW lookups)",
3361                ));
3362            }
3363            other => {
3364                return Err(ApiError::bad_request(format!(
3365                    "unknown edge type {other:?}; expected one of triple/document_chunk/cluster_member"
3366                )));
3367            }
3368        };
3369        if !out.contains(&kind) {
3370            out.push(kind);
3371        }
3372    }
3373    if out.is_empty() {
3374        return Err(ApiError::bad_request(
3375            "type filter is empty after parsing; either omit or list at least one type",
3376        ));
3377    }
3378    Ok(out)
3379}
3380
3381/// Opaque cursor for `/v1/graph/nodes`. Encodes the last item's
3382/// `(ts_ms, id)` so the next page is `WHERE (ts_ms, id) < (cursor.ts_ms,
3383/// cursor.id)` under sort `ts_ms DESC, id ASC`.
3384#[derive(Debug, Serialize, Deserialize)]
3385struct NodesCursor {
3386    ts_ms: i64,
3387    id: String,
3388}
3389
3390/// Opaque cursor for `/v1/graph/edges`. Encodes the last item's
3391/// `(kind_idx, sub_id)` so the next page resumes at `> cursor` under
3392/// sort `(kind_idx ASC, sub_id ASC)`. `sub_id` is the per-kind stable
3393/// row id (triple_id for triples, chunk_id for document_chunk, the
3394/// composite `cluster_id||memory_id` string for cluster_member).
3395#[derive(Debug, Serialize, Deserialize)]
3396struct EdgesCursor {
3397    kind_idx: u8,
3398    sub_id: String,
3399}
3400
3401fn encode_cursor<T: Serialize>(value: &T) -> Result<String, ApiError> {
3402    use base64::Engine;
3403    let json = serde_json::to_vec(value)
3404        .map_err(|e| ApiError::internal(format!("cursor serialize: {e}")))?;
3405    Ok(base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(json))
3406}
3407
3408fn decode_cursor<T: for<'de> Deserialize<'de>>(raw: &str) -> Result<T, ApiError> {
3409    use base64::Engine;
3410    let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
3411        .decode(raw.as_bytes())
3412        .map_err(|e| ApiError::bad_request(format!("cursor: bad base64: {e}")))?;
3413    serde_json::from_slice::<T>(&bytes)
3414        .map_err(|e| ApiError::bad_request(format!("cursor: bad JSON payload: {e}")))
3415}
3416
3417/// Internal staging row for the nodes endpoint. Carries the GraphNode
3418/// plus the sort key so we can merge all kinds before applying the
3419/// pagination cut.
3420#[derive(Debug)]
3421struct StagingNode {
3422    node: GraphNode,
3423    sort_ts_ms: i64,
3424    sort_id: String,
3425}
3426
3427/// Apply `ts_ms DESC, id ASC` ordering. (Newest first, deterministic
3428/// tie-break on id.)
3429fn cmp_node_sort_keys(a: (i64, &str), b: (i64, &str)) -> std::cmp::Ordering {
3430    // ts_ms DESC: invert
3431    match b.0.cmp(&a.0) {
3432        std::cmp::Ordering::Equal => a.1.cmp(b.1), // id ASC
3433        other => other,
3434    }
3435}
3436
3437/// True if `(ts_ms, id)` strictly comes AFTER `cursor` under the canonical
3438/// sort `ts_ms DESC, id ASC` — i.e. is admissible into a page following
3439/// the cursor.
3440fn node_passes_cursor(ts_ms: i64, id: &str, cursor: &NodesCursor) -> bool {
3441    cmp_node_sort_keys((ts_ms, id), (cursor.ts_ms, cursor.id.as_str()))
3442        == std::cmp::Ordering::Greater
3443}
3444
3445// --- Per-kind row fetchers (each runs a bounded query, applies the time
3446//     filter, returns rows already sorted `ts_ms DESC, id ASC`).
3447
3448#[derive(Debug)]
3449struct NodeRowEp {
3450    memory_id: String,
3451    ts_ms: i64,
3452    content: String,
3453}
3454
3455fn fetch_episodes_for_nodes(
3456    conn: &rusqlite::Connection,
3457    since_ms: Option<i64>,
3458    until_ms: Option<i64>,
3459    cursor: Option<&NodesCursor>,
3460    limit: i64,
3461) -> rusqlite::Result<Vec<NodeRowEp>> {
3462    let mut sql = String::from(
3463        "SELECT memory_id, ts_ms, content
3464           FROM episodes
3465          WHERE status = 'active'",
3466    );
3467    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3468    if let Some(s) = since_ms {
3469        sql.push_str(" AND ts_ms >= ?");
3470        params.push(s.into());
3471    }
3472    if let Some(u) = until_ms {
3473        sql.push_str(" AND ts_ms <= ?");
3474        params.push(u.into());
3475    }
3476    // Cursor pre-filter: under sort `ts_ms DESC, prefixed_id ASC`,
3477    // anything strictly newer than the cursor's ts_ms is in a previous
3478    // page; rows with equal ts_ms may or may not be (depends on the
3479    // cross-kind ordering). The post-merge step applies the full
3480    // `(ts_ms, prefixed_id)` comparison; here we just discard rows
3481    // that can't possibly survive.
3482    if let Some(cur) = cursor {
3483        sql.push_str(" AND ts_ms <= ?");
3484        params.push(cur.ts_ms.into());
3485    }
3486    sql.push_str(" ORDER BY ts_ms DESC, memory_id ASC LIMIT ?");
3487    params.push(limit.into());
3488    let mut stmt = conn.prepare(&sql)?;
3489    let rows: Vec<NodeRowEp> = stmt
3490        .query_map(rusqlite::params_from_iter(params), |r| {
3491            Ok(NodeRowEp {
3492                memory_id: r.get(0)?,
3493                ts_ms: r.get(1)?,
3494                content: r.get(2)?,
3495            })
3496        })?
3497        .collect::<rusqlite::Result<Vec<_>>>()?;
3498    Ok(rows)
3499}
3500
3501#[derive(Debug)]
3502struct NodeRowDoc {
3503    doc_id: String,
3504    title: Option<String>,
3505    source: Option<String>,
3506    ingested_at_ms: i64,
3507}
3508
3509fn fetch_documents_for_nodes(
3510    conn: &rusqlite::Connection,
3511    since_ms: Option<i64>,
3512    until_ms: Option<i64>,
3513    cursor: Option<&NodesCursor>,
3514    limit: i64,
3515) -> rusqlite::Result<Vec<NodeRowDoc>> {
3516    let mut sql = String::from(
3517        "SELECT doc_id, title, source, ingested_at_ms
3518           FROM documents
3519          WHERE status = 'active'",
3520    );
3521    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3522    if let Some(s) = since_ms {
3523        sql.push_str(" AND ingested_at_ms >= ?");
3524        params.push(s.into());
3525    }
3526    if let Some(u) = until_ms {
3527        sql.push_str(" AND ingested_at_ms <= ?");
3528        params.push(u.into());
3529    }
3530    if let Some(cur) = cursor {
3531        sql.push_str(" AND ingested_at_ms <= ?");
3532        params.push(cur.ts_ms.into());
3533    }
3534    sql.push_str(" ORDER BY ingested_at_ms DESC, doc_id ASC LIMIT ?");
3535    params.push(limit.into());
3536    let mut stmt = conn.prepare(&sql)?;
3537    let rows: Vec<NodeRowDoc> = stmt
3538        .query_map(rusqlite::params_from_iter(params), |r| {
3539            Ok(NodeRowDoc {
3540                doc_id: r.get(0)?,
3541                title: r.get(1)?,
3542                source: r.get(2)?,
3543                ingested_at_ms: r.get(3)?,
3544            })
3545        })?
3546        .collect::<rusqlite::Result<Vec<_>>>()?;
3547    Ok(rows)
3548}
3549
3550#[derive(Debug)]
3551struct NodeRowChunk {
3552    chunk_id: String,
3553    chunk_index: i64,
3554    content: String,
3555    created_at_ms: i64,
3556}
3557
3558fn fetch_chunks_for_nodes(
3559    conn: &rusqlite::Connection,
3560    since_ms: Option<i64>,
3561    until_ms: Option<i64>,
3562    cursor: Option<&NodesCursor>,
3563    limit: i64,
3564) -> rusqlite::Result<Vec<NodeRowChunk>> {
3565    // Filter by `document_chunks.created_at_ms`; chunks of forgotten
3566    // documents are filtered out by the join on `documents.status`.
3567    let mut sql = String::from(
3568        "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
3569           FROM document_chunks c
3570           JOIN documents d ON d.doc_id = c.doc_id
3571          WHERE d.status = 'active'",
3572    );
3573    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3574    if let Some(s) = since_ms {
3575        sql.push_str(" AND c.created_at_ms >= ?");
3576        params.push(s.into());
3577    }
3578    if let Some(u) = until_ms {
3579        sql.push_str(" AND c.created_at_ms <= ?");
3580        params.push(u.into());
3581    }
3582    if let Some(cur) = cursor {
3583        sql.push_str(" AND c.created_at_ms <= ?");
3584        params.push(cur.ts_ms.into());
3585    }
3586    sql.push_str(" ORDER BY c.created_at_ms DESC, c.chunk_id ASC LIMIT ?");
3587    params.push(limit.into());
3588    let mut stmt = conn.prepare(&sql)?;
3589    let rows: Vec<NodeRowChunk> = stmt
3590        .query_map(rusqlite::params_from_iter(params), |r| {
3591            Ok(NodeRowChunk {
3592                chunk_id: r.get(0)?,
3593                chunk_index: r.get(1)?,
3594                content: r.get(2)?,
3595                created_at_ms: r.get(3)?,
3596            })
3597        })?
3598        .collect::<rusqlite::Result<Vec<_>>>()?;
3599    Ok(rows)
3600}
3601
3602#[derive(Debug)]
3603struct NodeRowCluster {
3604    cluster_id: String,
3605    abstraction: Option<String>,
3606    created_at_ms: i64,
3607}
3608
3609fn fetch_clusters_for_nodes(
3610    conn: &rusqlite::Connection,
3611    since_ms: Option<i64>,
3612    until_ms: Option<i64>,
3613    cursor: Option<&NodesCursor>,
3614    limit: i64,
3615) -> rusqlite::Result<Vec<NodeRowCluster>> {
3616    // clusters has no `status` column; LEFT JOIN abstractions for the
3617    // optional label.
3618    let mut sql = String::from(
3619        "SELECT c.cluster_id, sa.content, c.created_at_ms
3620           FROM clusters c
3621           LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
3622          WHERE 1=1",
3623    );
3624    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3625    if let Some(s) = since_ms {
3626        sql.push_str(" AND c.created_at_ms >= ?");
3627        params.push(s.into());
3628    }
3629    if let Some(u) = until_ms {
3630        sql.push_str(" AND c.created_at_ms <= ?");
3631        params.push(u.into());
3632    }
3633    if let Some(cur) = cursor {
3634        sql.push_str(" AND c.created_at_ms <= ?");
3635        params.push(cur.ts_ms.into());
3636    }
3637    sql.push_str(" ORDER BY c.created_at_ms DESC, c.cluster_id ASC LIMIT ?");
3638    params.push(limit.into());
3639    let mut stmt = conn.prepare(&sql)?;
3640    let rows: Vec<NodeRowCluster> = stmt
3641        .query_map(rusqlite::params_from_iter(params), |r| {
3642            Ok(NodeRowCluster {
3643                cluster_id: r.get(0)?,
3644                abstraction: r.get(1)?,
3645                created_at_ms: r.get(2)?,
3646            })
3647        })?
3648        .collect::<rusqlite::Result<Vec<_>>>()?;
3649    Ok(rows)
3650}
3651
3652#[derive(Debug)]
3653struct NodeRowEntity {
3654    value: String,
3655    ref_count: i64,
3656    first_seen_ms: i64,
3657}
3658
3659/// Synthesize entity nodes from the triples table. Caps result at
3660/// `GRAPH_ENTITY_CAP`, ordered by `ref_count DESC` so the loudest
3661/// entities make the cut. Returns (rows, cap_reached).
3662///
3663/// **Cost**: this is O(N) over active triples per request. For tenants
3664/// with >100k triples this can be noticeable; v0.10.x can cache the
3665/// rollup if profiling justifies it. The 200-row cap keeps the wire
3666/// payload bounded regardless.
3667fn fetch_entities_for_nodes(
3668    conn: &rusqlite::Connection,
3669    since_ms: Option<i64>,
3670    until_ms: Option<i64>,
3671    cursor: Option<&NodesCursor>,
3672) -> rusqlite::Result<(Vec<NodeRowEntity>, bool)> {
3673    // Pull subject + object columns, group by value, compute count + min
3674    // ts_ms. UNION ALL the two columns into a single aggregation. Apply
3675    // time filter against `valid_from_ms` (the closest analogue to "when
3676    // was this entity first referenced").
3677    let mut sql = String::from(
3678        "WITH all_refs AS (
3679            SELECT subject_id AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
3680            UNION ALL
3681            SELECT object_id  AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
3682         )
3683         SELECT value, COUNT(*) AS ref_count, MIN(ts_ms) AS first_seen_ms
3684           FROM all_refs
3685          WHERE 1=1",
3686    );
3687    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3688    if let Some(s) = since_ms {
3689        sql.push_str(" AND ts_ms >= ?");
3690        params.push(s.into());
3691    }
3692    if let Some(u) = until_ms {
3693        sql.push_str(" AND ts_ms <= ?");
3694        params.push(u.into());
3695    }
3696    // Cursor: drop entities whose first_seen_ms strictly newer than the
3697    // cursor. We can't predicate on COUNT() until after GROUP BY, so the
3698    // cap-applicable filter sits in the HAVING clause.
3699    sql.push_str(" GROUP BY value");
3700    if let Some(ts) = cursor.map(|c| c.ts_ms) {
3701        sql.push_str(" HAVING MIN(ts_ms) <= ?");
3702        params.push(ts.into());
3703    }
3704    // Over-fetch by one to detect "cap reached".
3705    let want = GRAPH_ENTITY_CAP as i64 + 1;
3706    sql.push_str(" ORDER BY ref_count DESC, value ASC LIMIT ?");
3707    params.push(want.into());
3708    let mut stmt = conn.prepare(&sql)?;
3709    let rows: Vec<NodeRowEntity> = stmt
3710        .query_map(rusqlite::params_from_iter(params), |r| {
3711            Ok(NodeRowEntity {
3712                value: r.get(0)?,
3713                ref_count: r.get(1)?,
3714                first_seen_ms: r.get(2)?,
3715            })
3716        })?
3717        .collect::<rusqlite::Result<Vec<_>>>()?;
3718    let cap_reached = rows.len() > GRAPH_ENTITY_CAP;
3719    let mut trimmed = rows;
3720    if cap_reached {
3721        trimmed.truncate(GRAPH_ENTITY_CAP);
3722    }
3723    Ok((trimmed, cap_reached))
3724}
3725
3726/// `GET /v1/graph/nodes`. Paginated node catalog across the tenant.
3727/// See module-level comments for the contract.
3728async fn graph_nodes_handler(
3729    TenantExtractor(tenant): TenantExtractor,
3730    Query(q): Query<GraphNodesQuery>,
3731) -> Result<Response, ApiError> {
3732    let limit = q.limit.unwrap_or(GRAPH_NODES_DEFAULT_LIMIT);
3733    let limit = limit.clamp(1, GRAPH_NODES_MAX_LIMIT);
3734    let kinds = parse_node_kind_filter(q.kind.as_deref())?;
3735    let since_ms = q.since_ms;
3736    let until_ms = q.until_ms;
3737    if let (Some(s), Some(u)) = (since_ms, until_ms) {
3738        if s > u {
3739            return Err(ApiError::bad_request(format!(
3740                "since_ms ({s}) must be <= until_ms ({u})"
3741            )));
3742        }
3743    }
3744    let cursor = match q.cursor.as_deref() {
3745        None => None,
3746        Some("") => None,
3747        Some(raw) => Some(decode_cursor::<NodesCursor>(raw)?),
3748    };
3749    let want_episode = kinds.contains(&NodeKind::Episode);
3750    let want_document = kinds.contains(&NodeKind::Document);
3751    let want_chunk = kinds.contains(&NodeKind::Chunk);
3752    let want_cluster = kinds.contains(&NodeKind::Cluster);
3753    let want_entity = kinds.contains(&NodeKind::Entity);
3754
3755    // Over-fetch `limit + 2` per kind:
3756    //   * `+1` so the merge step can detect "more rows available beyond
3757    //     this page" → emits a `next_cursor` instead of None.
3758    //   * `+1` again because the SQL pre-filter `ts_ms <= cursor.ts_ms`
3759    //     can pull the previous page's last item back in; the post-merge
3760    //     cursor predicate drops it, costing one row of headroom.
3761    // The entity cap stays at GRAPH_ENTITY_CAP — entities are bounded
3762    // independently by the response cap, not the page limit.
3763    let per_kind_limit = (limit as i64).saturating_add(2);
3764    let tenant_id_for_blocking = tenant.tenant_id().to_string();
3765    let cursor_clone = cursor.as_ref().map(|c| NodesCursor {
3766        ts_ms: c.ts_ms,
3767        id: c.id.clone(),
3768    });
3769
3770    let (mut staged, cap_reached) = tenant
3771        .read()
3772        .interact(move |conn| {
3773            let mut staged: Vec<StagingNode> = Vec::new();
3774            let mut cap_reached = false;
3775            let cursor_ref = cursor_clone.as_ref();
3776
3777            if want_episode {
3778                let eps =
3779                    fetch_episodes_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3780                for ep in eps {
3781                    let id = format!("ep:{}", ep.memory_id);
3782                    let exp = ExpandedEpisode {
3783                        memory_id: ep.memory_id,
3784                        ts_ms: ep.ts_ms,
3785                        content: ep.content,
3786                    };
3787                    let node = graph_node_for_episode(&tenant_id_for_blocking, &exp);
3788                    staged.push(StagingNode {
3789                        sort_ts_ms: ep.ts_ms,
3790                        sort_id: id.clone(),
3791                        node,
3792                    });
3793                }
3794            }
3795            if want_document {
3796                let docs = fetch_documents_for_nodes(
3797                    conn,
3798                    since_ms,
3799                    until_ms,
3800                    cursor_ref,
3801                    per_kind_limit,
3802                )?;
3803                for d in docs {
3804                    let id = format!("doc:{}", d.doc_id);
3805                    let exp = ExpandedDocument {
3806                        doc_id: d.doc_id,
3807                        title: d.title,
3808                        source: d.source,
3809                        ingested_at_ms: d.ingested_at_ms,
3810                    };
3811                    let node = graph_node_for_document(&tenant_id_for_blocking, &exp);
3812                    staged.push(StagingNode {
3813                        sort_ts_ms: d.ingested_at_ms,
3814                        sort_id: id.clone(),
3815                        node,
3816                    });
3817                }
3818            }
3819            if want_chunk {
3820                let chunks =
3821                    fetch_chunks_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3822                for c in chunks {
3823                    let id = format!("chunk:{}", c.chunk_id);
3824                    let exp = ExpandedChunk {
3825                        chunk_id: c.chunk_id,
3826                        chunk_index: c.chunk_index,
3827                        content: c.content,
3828                    };
3829                    // graph_node_for_chunk sets ts_ms = None for the
3830                    // wire format (chunks don't have a natural user-
3831                    // facing timestamp); but for sorting we use the
3832                    // row's created_at_ms.
3833                    let mut node = graph_node_for_chunk(&tenant_id_for_blocking, &exp);
3834                    node.ts_ms = Some(c.created_at_ms);
3835                    staged.push(StagingNode {
3836                        sort_ts_ms: c.created_at_ms,
3837                        sort_id: id.clone(),
3838                        node,
3839                    });
3840                }
3841            }
3842            if want_cluster {
3843                let cls =
3844                    fetch_clusters_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3845                for c in cls {
3846                    let id = format!("cl:{}", c.cluster_id);
3847                    let node = graph_node_for_cluster(
3848                        &tenant_id_for_blocking,
3849                        &c.cluster_id,
3850                        c.abstraction.as_deref(),
3851                        c.created_at_ms,
3852                    );
3853                    staged.push(StagingNode {
3854                        sort_ts_ms: c.created_at_ms,
3855                        sort_id: id.clone(),
3856                        node,
3857                    });
3858                }
3859            }
3860            if want_entity {
3861                let (ents, was_cap_reached) =
3862                    fetch_entities_for_nodes(conn, since_ms, until_ms, cursor_ref)?;
3863                cap_reached = was_cap_reached;
3864                for e in ents {
3865                    let id = format!("ent:{}", e.value);
3866                    let mut node = graph_node_for_entity(&tenant_id_for_blocking, &e.value);
3867                    node.ts_ms = Some(e.first_seen_ms);
3868                    node.preview = Some(format!("Referenced in {} triples", e.ref_count));
3869                    staged.push(StagingNode {
3870                        sort_ts_ms: e.first_seen_ms,
3871                        sort_id: id.clone(),
3872                        node,
3873                    });
3874                }
3875            }
3876            Ok::<_, rusqlite::Error>((staged, cap_reached))
3877        })
3878        .await
3879        .map_err(ApiError::from)?;
3880
3881    // Apply cursor filter.
3882    if let Some(cur) = &cursor {
3883        staged.retain(|s| node_passes_cursor(s.sort_ts_ms, &s.sort_id, cur));
3884    }
3885
3886    // Sort `ts_ms DESC, id ASC`.
3887    staged
3888        .sort_by(|a, b| cmp_node_sort_keys((a.sort_ts_ms, &a.sort_id), (b.sort_ts_ms, &b.sort_id)));
3889
3890    // Apply page limit + compute next_cursor.
3891    let limit_us = limit as usize;
3892    let next_cursor = if staged.len() > limit_us {
3893        let last = &staged[limit_us - 1];
3894        Some(NodesCursor {
3895            ts_ms: last.sort_ts_ms,
3896            id: last.sort_id.clone(),
3897        })
3898    } else {
3899        None
3900    };
3901    staged.truncate(limit_us);
3902
3903    let next_cursor_str = match next_cursor {
3904        Some(c) => Some(encode_cursor(&c)?),
3905        None => None,
3906    };
3907
3908    let nodes: Vec<GraphNode> = staged.into_iter().map(|s| s.node).collect();
3909    let payload = GraphNodesResponse {
3910        nodes,
3911        next_cursor: next_cursor_str,
3912    };
3913
3914    // Attach the entity-cap header so clients can show truncation UX
3915    // without parsing the body.
3916    let mut response = Json(payload).into_response();
3917    if cap_reached {
3918        response
3919            .headers_mut()
3920            .insert(ENTITY_CAP_HEADER, HeaderValue::from_static("true"));
3921    }
3922    Ok(response)
3923}
3924
3925// --- /v1/graph/edges --------------------------------------------------
3926
3927#[derive(Debug)]
3928struct StagingEdge {
3929    edge: GraphEdge,
3930    kind_idx: u8,
3931    sub_id: String,
3932}
3933
3934fn cmp_edge_sort_keys(a: (u8, &str), b: (u8, &str)) -> std::cmp::Ordering {
3935    match a.0.cmp(&b.0) {
3936        std::cmp::Ordering::Equal => a.1.cmp(b.1),
3937        other => other,
3938    }
3939}
3940
3941fn edge_passes_cursor(kind_idx: u8, sub_id: &str, cursor: &EdgesCursor) -> bool {
3942    cmp_edge_sort_keys(
3943        (kind_idx, sub_id),
3944        (cursor.kind_idx, cursor.sub_id.as_str()),
3945    ) == std::cmp::Ordering::Greater
3946}
3947
3948/// Whether the supplied focus `node_id` (kind, value) matches an edge's
3949/// (source, target) endpoint pair under a given edge kind. Used to
3950/// filter `?node_id=...` queries.
3951fn edge_touches_focus(
3952    kind: EdgeKind,
3953    focus_kind: NodeKind,
3954    focus_value: &str,
3955    src_value: &str,
3956    tgt_value: &str,
3957    extra_value: Option<&str>,
3958) -> bool {
3959    // Determine which endpoint kinds this edge family produces; if the
3960    // focus kind isn't compatible, no match.
3961    match kind {
3962        EdgeKind::Triple => match focus_kind {
3963            // Triple edges flow source_episode → ent:<object_id>. We
3964            // also expose subject/object entities as endpoints (see
3965            // emit_triple_edges_for_focus); the matching here covers
3966            // episode focus + entity focus + the symmetric pair.
3967            NodeKind::Episode => src_value == focus_value,
3968            NodeKind::Entity => {
3969                tgt_value == focus_value
3970                    || extra_value.map(|x| x == focus_value).unwrap_or(false)
3971                    || src_value == focus_value
3972            }
3973            _ => false,
3974        },
3975        EdgeKind::DocumentChunk => match focus_kind {
3976            NodeKind::Document => src_value == focus_value,
3977            NodeKind::Chunk => tgt_value == focus_value,
3978            _ => false,
3979        },
3980        EdgeKind::ClusterMember => match focus_kind {
3981            NodeKind::Cluster => src_value == focus_value,
3982            NodeKind::Episode => tgt_value == focus_value,
3983            _ => false,
3984        },
3985    }
3986}
3987
3988#[derive(Debug)]
3989struct EdgeRowTriple {
3990    triple_id: String,
3991    source_memory_id: Option<String>,
3992    object_id: String,
3993    predicate: String,
3994    confidence: f32,
3995}
3996
3997fn fetch_triple_edges(conn: &rusqlite::Connection) -> rusqlite::Result<Vec<EdgeRowTriple>> {
3998    // Emit one edge per triple: source_episode → ent:object_id. Skip
3999    // orphan triples (`source_episode_id IS NULL`). Bound the scan at
4000    // GRAPH_EDGES_MAX_LIMIT * a safety multiplier so a runaway tenant
4001    // doesn't OOM the page-builder; the merge-and-page step trims to
4002    // the real limit downstream.
4003    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
4004    let mut stmt = conn.prepare(
4005        "SELECT t.triple_id, e.memory_id, t.object_id, t.predicate, t.confidence
4006           FROM triples t
4007           LEFT JOIN episodes e ON e.rowid = t.source_episode_id
4008          WHERE t.status = 'active'
4009          ORDER BY t.triple_id ASC
4010          LIMIT ?1",
4011    )?;
4012    let rows: Vec<EdgeRowTriple> = stmt
4013        .query_map(rusqlite::params![safety_cap], |r| {
4014            Ok(EdgeRowTriple {
4015                triple_id: r.get(0)?,
4016                source_memory_id: r.get::<_, Option<String>>(1)?,
4017                object_id: r.get(2)?,
4018                predicate: r.get(3)?,
4019                confidence: r.get(4)?,
4020            })
4021        })?
4022        .collect::<rusqlite::Result<Vec<_>>>()?;
4023    Ok(rows)
4024}
4025
4026#[derive(Debug)]
4027struct EdgeRowDocChunk {
4028    chunk_id: String,
4029    doc_id: String,
4030}
4031
4032fn fetch_document_chunk_edges(
4033    conn: &rusqlite::Connection,
4034) -> rusqlite::Result<Vec<EdgeRowDocChunk>> {
4035    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
4036    let mut stmt = conn.prepare(
4037        "SELECT c.chunk_id, c.doc_id
4038           FROM document_chunks c
4039           JOIN documents d ON d.doc_id = c.doc_id
4040          WHERE d.status = 'active'
4041          ORDER BY c.chunk_id ASC
4042          LIMIT ?1",
4043    )?;
4044    let rows: Vec<EdgeRowDocChunk> = stmt
4045        .query_map(rusqlite::params![safety_cap], |r| {
4046            Ok(EdgeRowDocChunk {
4047                chunk_id: r.get(0)?,
4048                doc_id: r.get(1)?,
4049            })
4050        })?
4051        .collect::<rusqlite::Result<Vec<_>>>()?;
4052    Ok(rows)
4053}
4054
4055#[derive(Debug)]
4056struct EdgeRowClusterMember {
4057    cluster_id: String,
4058    memory_id: String,
4059}
4060
4061fn fetch_cluster_member_edges(
4062    conn: &rusqlite::Connection,
4063) -> rusqlite::Result<Vec<EdgeRowClusterMember>> {
4064    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
4065    let mut stmt = conn.prepare(
4066        "SELECT ce.cluster_id, ce.memory_id
4067           FROM cluster_episodes ce
4068           JOIN episodes e ON e.memory_id = ce.memory_id
4069          WHERE e.status = 'active'
4070          ORDER BY ce.cluster_id ASC, ce.memory_id ASC
4071          LIMIT ?1",
4072    )?;
4073    let rows: Vec<EdgeRowClusterMember> = stmt
4074        .query_map(rusqlite::params![safety_cap], |r| {
4075            Ok(EdgeRowClusterMember {
4076                cluster_id: r.get(0)?,
4077                memory_id: r.get(1)?,
4078            })
4079        })?
4080        .collect::<rusqlite::Result<Vec<_>>>()?;
4081    Ok(rows)
4082}
4083
4084/// `GET /v1/graph/edges`. Paginated edge catalog. See module-level
4085/// comments for the contract.
4086async fn graph_edges_handler(
4087    TenantExtractor(tenant): TenantExtractor,
4088    Query(q): Query<GraphEdgesQuery>,
4089) -> Result<Json<GraphEdgesResponse>, ApiError> {
4090    let limit = q.limit.unwrap_or(GRAPH_EDGES_DEFAULT_LIMIT);
4091    let limit = limit.clamp(1, GRAPH_EDGES_MAX_LIMIT);
4092    let kinds = parse_edge_kind_filter(q.r#type.as_deref())?;
4093    let cursor = match q.cursor.as_deref() {
4094        None => None,
4095        Some("") => None,
4096        Some(raw) => Some(decode_cursor::<EdgesCursor>(raw)?),
4097    };
4098
4099    let focus = match q.node_id.as_deref() {
4100        None => None,
4101        Some(raw) => {
4102            let (kind, value) = parse_node_id(raw)?;
4103            Some((kind, value.to_string()))
4104        }
4105    };
4106
4107    let want_triple = kinds.contains(&EdgeKind::Triple);
4108    let want_doc_chunk = kinds.contains(&EdgeKind::DocumentChunk);
4109    let want_cluster_member = kinds.contains(&EdgeKind::ClusterMember);
4110
4111    let staged: Vec<StagingEdge> = tenant
4112        .read()
4113        .interact(move |conn| {
4114            let mut staged: Vec<StagingEdge> = Vec::new();
4115
4116            if want_triple {
4117                for t in fetch_triple_edges(conn)? {
4118                    let src_id = match &t.source_memory_id {
4119                        Some(mid) => format!("ep:{mid}"),
4120                        None => continue, // orphan triple — skip
4121                    };
4122                    let tgt_id = format!("ent:{}", t.object_id);
4123                    if let Some((fk, fv)) = &focus {
4124                        // `src_value` for matching is the bare memory_id
4125                        // (after the `ep:` prefix); `tgt_value` is the
4126                        // bare entity value.
4127                        if !edge_touches_focus(
4128                            EdgeKind::Triple,
4129                            *fk,
4130                            fv,
4131                            t.source_memory_id.as_deref().unwrap_or(""),
4132                            &t.object_id,
4133                            // Triples carry a subject_id too, but the
4134                            // emitted edge only goes ep → ent(object).
4135                            // For entity-focus matches we also accept
4136                            // hits on subject_id; surface it through
4137                            // the `extra` slot.
4138                            None,
4139                        ) {
4140                            continue;
4141                        }
4142                    }
4143                    let edge = GraphEdge {
4144                        id: edge_id(&src_id, "triple", &tgt_id),
4145                        source: src_id,
4146                        target: tgt_id,
4147                        kind: "triple",
4148                        predicate: Some(t.predicate),
4149                        weight: Some(t.confidence),
4150                    };
4151                    staged.push(StagingEdge {
4152                        edge,
4153                        kind_idx: EdgeKind::Triple.order_idx(),
4154                        sub_id: t.triple_id,
4155                    });
4156                }
4157            }
4158            if want_doc_chunk {
4159                for dc in fetch_document_chunk_edges(conn)? {
4160                    let src_id = format!("doc:{}", dc.doc_id);
4161                    let tgt_id = format!("chunk:{}", dc.chunk_id);
4162                    if let Some((fk, fv)) = &focus {
4163                        if !edge_touches_focus(
4164                            EdgeKind::DocumentChunk,
4165                            *fk,
4166                            fv,
4167                            &dc.doc_id,
4168                            &dc.chunk_id,
4169                            None,
4170                        ) {
4171                            continue;
4172                        }
4173                    }
4174                    let edge = GraphEdge {
4175                        id: edge_id(&src_id, "document_chunk", &tgt_id),
4176                        source: src_id,
4177                        target: tgt_id,
4178                        kind: "document_chunk",
4179                        predicate: None,
4180                        weight: None,
4181                    };
4182                    staged.push(StagingEdge {
4183                        edge,
4184                        kind_idx: EdgeKind::DocumentChunk.order_idx(),
4185                        sub_id: dc.chunk_id,
4186                    });
4187                }
4188            }
4189            if want_cluster_member {
4190                for cm in fetch_cluster_member_edges(conn)? {
4191                    let src_id = format!("cl:{}", cm.cluster_id);
4192                    let tgt_id = format!("ep:{}", cm.memory_id);
4193                    if let Some((fk, fv)) = &focus {
4194                        if !edge_touches_focus(
4195                            EdgeKind::ClusterMember,
4196                            *fk,
4197                            fv,
4198                            &cm.cluster_id,
4199                            &cm.memory_id,
4200                            None,
4201                        ) {
4202                            continue;
4203                        }
4204                    }
4205                    let edge = GraphEdge {
4206                        id: edge_id(&src_id, "cluster_member", &tgt_id),
4207                        source: src_id,
4208                        target: tgt_id,
4209                        kind: "cluster_member",
4210                        predicate: None,
4211                        weight: None,
4212                    };
4213                    let sub_id = format!("{}\u{1f}{}", cm.cluster_id, cm.memory_id);
4214                    staged.push(StagingEdge {
4215                        edge,
4216                        kind_idx: EdgeKind::ClusterMember.order_idx(),
4217                        sub_id,
4218                    });
4219                }
4220            }
4221            Ok::<_, rusqlite::Error>(staged)
4222        })
4223        .await
4224        .map_err(ApiError::from)?;
4225
4226    // Apply cursor filter.
4227    let mut staged = staged;
4228    if let Some(cur) = &cursor {
4229        staged.retain(|s| edge_passes_cursor(s.kind_idx, &s.sub_id, cur));
4230    }
4231
4232    // Sort `(kind_idx ASC, sub_id ASC)` — stable, simple.
4233    staged.sort_by(|a, b| cmp_edge_sort_keys((a.kind_idx, &a.sub_id), (b.kind_idx, &b.sub_id)));
4234
4235    let limit_us = limit as usize;
4236    let next_cursor = if staged.len() > limit_us {
4237        let last = &staged[limit_us - 1];
4238        Some(EdgesCursor {
4239            kind_idx: last.kind_idx,
4240            sub_id: last.sub_id.clone(),
4241        })
4242    } else {
4243        None
4244    };
4245    staged.truncate(limit_us);
4246    let next_cursor_str = match next_cursor {
4247        Some(c) => Some(encode_cursor(&c)?),
4248        None => None,
4249    };
4250
4251    let edges: Vec<GraphEdge> = staged.into_iter().map(|s| s.edge).collect();
4252    Ok(Json(GraphEdgesResponse {
4253        edges,
4254        next_cursor: next_cursor_str,
4255    }))
4256}
4257
4258// ---------------------------------------------------------------------------
4259// Graph inspect — kind-discriminated full-record drill (v0.10.0)
4260//
4261// `GET /v1/graph/inspect/{id}` powers solo-web's right-side inspector
4262// panel. Path `id` carries the prefixed node identifier (ep:/doc:/chunk:/
4263// cl:/ent:); the handler dispatches per-kind and returns the same wire
4264// shape solo-web's `InspectResponse` expects: `{ node, full_text?,
4265// triples_in[], triples_out[] }`.
4266//
4267// Per-kind contract (v0.10.0 P1):
4268//   * `ep:<memory_id>`     full_text = episodes.content (untruncated),
4269//                          triples_in = [],
4270//                          triples_out = triples WHERE source_episode_id = rowid
4271//                          (one edge per triple, ep -> ent(object), predicate
4272//                          + weight surfaced). Episodes never appear as triple
4273//                          subjects/objects, so triples_in is structurally
4274//                          empty.
4275//   * `doc:<doc_id>`       full_text = concatenated chunk bodies separated by
4276//                          "\n\n" (no `documents.full_text` column exists; the
4277//                          chunks-concat path produces the same final text the
4278//                          ingester chunked from). triples_in/out = [] --
4279//                          documents don't directly carry triples; their
4280//                          chunks transitively do, but the inspector reaches
4281//                          those via the existing `/v1/graph/expand` drill.
4282//   * `chunk:<chunk_id>`   full_text = document_chunks.content,
4283//                          triples_in/out = [] (chunks aren't triple endpoints).
4284//   * `cl:<cluster_id>`    full_text = label + "\n\n" + abstraction
4285//                          (`semantic_abstractions.content`) when an
4286//                          abstraction exists; just the label otherwise.
4287//                          triples_in/out = [].
4288//   * `ent:<value>`        full_text = None (entities have no body),
4289//                          triples_in = [],
4290//                          triples_out = all triples where the entity appears
4291//                          as subject OR object. Capped at
4292//                          `GRAPH_INSPECT_ENTITY_TRIPLES_CAP` (50). Entities
4293//                          are synthetic -- an `ent:<value>` with zero triples
4294//                          in the tenant returns 404 (the entity exists only
4295//                          if at least one triple references it).
4296//
4297// Error semantics: 404 if the prefixed id has no row in the tenant's DB.
4298// 400 if the prefix is unknown or the body after `:` is empty (reuses
4299// `parse_node_id`). Tenant + auth are handled by the existing extractors.
4300//
4301// Lesson #30: no audit emit. Inspect is a derived read over already-
4302// audited primitives.
4303// ---------------------------------------------------------------------------
4304
4305/// Cap on triples returned for an entity inspect. Entities can be heavily
4306/// referenced ("user", "Alice"); the inspector panel only needs enough
4307/// for orientation. The `/v1/graph/expand?kind=triple` path delivers the
4308/// paginated full set when the UI needs more.
4309const GRAPH_INSPECT_ENTITY_TRIPLES_CAP: i64 = 50;
4310
4311#[derive(Debug, Serialize)]
4312struct GraphInspectResponse {
4313    node: GraphNode,
4314    #[serde(skip_serializing_if = "Option::is_none")]
4315    full_text: Option<String>,
4316    triples_in: Vec<GraphEdge>,
4317    triples_out: Vec<GraphEdge>,
4318}
4319
4320/// `GET /v1/graph/inspect/{id}`. See module-level comments.
4321async fn graph_inspect_handler(
4322    TenantExtractor(tenant): TenantExtractor,
4323    Path(id): Path<String>,
4324) -> Result<Json<GraphInspectResponse>, ApiError> {
4325    let (kind, value) = parse_node_id(&id)?;
4326    let tenant_id_str = tenant.tenant_id().to_string();
4327    let value = value.to_string();
4328    let node_id_full = id;
4329    match kind {
4330        NodeKind::Episode => {
4331            inspect_episode_node(&tenant, &tenant_id_str, value, node_id_full).await
4332        }
4333        NodeKind::Document => {
4334            inspect_document_node(&tenant, &tenant_id_str, value, node_id_full).await
4335        }
4336        NodeKind::Chunk => inspect_chunk_node(&tenant, &tenant_id_str, value, node_id_full).await,
4337        NodeKind::Cluster => {
4338            inspect_cluster_node(&tenant, &tenant_id_str, value, node_id_full).await
4339        }
4340        NodeKind::Entity => inspect_entity_node(&tenant, &tenant_id_str, value, node_id_full).await,
4341    }
4342    .map(Json)
4343}
4344
4345// ---- per-kind paths ----
4346
4347async fn inspect_episode_node(
4348    tenant: &TenantHandle,
4349    tenant_id: &str,
4350    memory_id: String,
4351    node_id_full: String,
4352) -> Result<GraphInspectResponse, ApiError> {
4353    let memory_id_for_err = memory_id.clone();
4354    let memory_id_q = memory_id.clone();
4355    // Fetch the episode row + all triples sourced from it in one
4356    // interact() call to keep the connection check-out short.
4357    let fetched: Option<(ExpandedEpisode, Vec<TripleRow>)> = tenant
4358        .read()
4359        .interact(move |conn| {
4360            let ep_row: Option<(i64, i64, String)> = conn
4361                .query_row(
4362                    "SELECT rowid, ts_ms, content
4363                       FROM episodes
4364                      WHERE memory_id = ?1
4365                        AND status = 'active'",
4366                    rusqlite::params![&memory_id_q],
4367                    |r| {
4368                        Ok((
4369                            r.get::<_, i64>(0)?,
4370                            r.get::<_, i64>(1)?,
4371                            r.get::<_, String>(2)?,
4372                        ))
4373                    },
4374                )
4375                .map(Some)
4376                .or_else(|e| match e {
4377                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
4378                    other => Err(other),
4379                })?;
4380            let Some((rowid, ts_ms, content)) = ep_row else {
4381                return Ok(None);
4382            };
4383            let mut stmt = conn.prepare(
4384                "SELECT subject_id, predicate, object_id, confidence
4385                   FROM triples
4386                  WHERE source_episode_id = ?1
4387                    AND status = 'active'
4388                  ORDER BY valid_from_ms DESC",
4389            )?;
4390            let triples = stmt
4391                .query_map(rusqlite::params![rowid], |r| {
4392                    Ok(TripleRow {
4393                        subject_id: r.get(0)?,
4394                        predicate: r.get(1)?,
4395                        object_id: r.get(2)?,
4396                        confidence: r.get(3)?,
4397                    })
4398                })?
4399                .collect::<rusqlite::Result<Vec<_>>>()?;
4400            let ep = ExpandedEpisode {
4401                memory_id: memory_id_q,
4402                ts_ms,
4403                content,
4404            };
4405            Ok::<_, rusqlite::Error>(Some((ep, triples)))
4406        })
4407        .await
4408        .map_err(ApiError::from)?;
4409
4410    let (ep, triples) = fetched.ok_or_else(|| {
4411        ApiError::not_found(format!(
4412            "node_id {node_id_full:?} (memory_id {memory_id_for_err}) not found in current tenant"
4413        ))
4414    })?;
4415
4416    let node = graph_node_for_episode(tenant_id, &ep);
4417    let full_text = Some(ep.content.clone());
4418    // Triples flow from this episode (the source) to entity endpoints.
4419    // Emit one edge per triple: ep -> ent(object), predicate from the
4420    // triple, weight = confidence. This mirrors the `/v1/graph/edges`
4421    // triple-edge convention so the renderer can dedupe via composite id.
4422    let mut triples_out = Vec::with_capacity(triples.len());
4423    for t in triples {
4424        let tgt_id = format!("ent:{}", t.object_id);
4425        triples_out.push(GraphEdge {
4426            id: edge_id(&node_id_full, "triple", &tgt_id),
4427            source: node_id_full.clone(),
4428            target: tgt_id,
4429            kind: "triple",
4430            predicate: Some(t.predicate),
4431            weight: Some(t.confidence),
4432        });
4433    }
4434    Ok(GraphInspectResponse {
4435        node,
4436        full_text,
4437        triples_in: Vec::new(),
4438        triples_out,
4439    })
4440}
4441
4442async fn inspect_document_node(
4443    tenant: &TenantHandle,
4444    tenant_id: &str,
4445    doc_id: String,
4446    node_id_full: String,
4447) -> Result<GraphInspectResponse, ApiError> {
4448    let doc_id_for_err = doc_id.clone();
4449    let doc_id_q = doc_id.clone();
4450    // Fetch the document row + all chunk bodies (ORDER BY chunk_index) in
4451    // one interact() call. The chunks-concat path is the source of full_text
4452    // since the `documents` table doesn't carry the original raw text. For
4453    // v0.10.0 P1 we concatenate every chunk; pagination is the inspector
4454    // panel's responsibility if the document is very large.
4455    let fetched: Option<(ExpandedDocument, Vec<String>)> = tenant
4456        .read()
4457        .interact(move |conn| {
4458            let doc_row: Option<ExpandedDocument> = conn
4459                .query_row(
4460                    "SELECT doc_id, title, source, ingested_at_ms
4461                       FROM documents
4462                      WHERE doc_id = ?1
4463                        AND status = 'active'",
4464                    rusqlite::params![&doc_id_q],
4465                    |r| {
4466                        Ok(ExpandedDocument {
4467                            doc_id: r.get(0)?,
4468                            title: r.get(1)?,
4469                            source: r.get(2)?,
4470                            ingested_at_ms: r.get(3)?,
4471                        })
4472                    },
4473                )
4474                .map(Some)
4475                .or_else(|e| match e {
4476                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
4477                    other => Err(other),
4478                })?;
4479            let Some(doc) = doc_row else {
4480                return Ok(None);
4481            };
4482            let mut stmt = conn.prepare(
4483                "SELECT content
4484                   FROM document_chunks
4485                  WHERE doc_id = ?1
4486                  ORDER BY chunk_index ASC",
4487            )?;
4488            let chunks = stmt
4489                .query_map(rusqlite::params![&doc_id_q], |r| r.get::<_, String>(0))?
4490                .collect::<rusqlite::Result<Vec<_>>>()?;
4491            Ok::<_, rusqlite::Error>(Some((doc, chunks)))
4492        })
4493        .await
4494        .map_err(ApiError::from)?;
4495
4496    let (doc, chunks) = fetched.ok_or_else(|| {
4497        ApiError::not_found(format!(
4498            "node_id {node_id_full:?} (doc_id {doc_id_for_err}) not found in current tenant"
4499        ))
4500    })?;
4501
4502    let full_text = if chunks.is_empty() {
4503        // Document with zero chunks (e.g. mid-ingest, or an empty source).
4504        // Return None to signal "no body available" rather than an empty
4505        // string -- saves the renderer a degenerate code path.
4506        None
4507    } else {
4508        Some(chunks.join("\n\n"))
4509    };
4510
4511    Ok(GraphInspectResponse {
4512        node: graph_node_for_document(tenant_id, &doc),
4513        full_text,
4514        triples_in: Vec::new(),
4515        triples_out: Vec::new(),
4516    })
4517}
4518
4519async fn inspect_chunk_node(
4520    tenant: &TenantHandle,
4521    tenant_id: &str,
4522    chunk_id: String,
4523    node_id_full: String,
4524) -> Result<GraphInspectResponse, ApiError> {
4525    let chunk_id_for_err = chunk_id.clone();
4526    let chunk_id_q = chunk_id.clone();
4527    let row: Option<(ExpandedChunk, i64)> = tenant
4528        .read()
4529        .interact(move |conn| {
4530            conn.query_row(
4531                "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
4532                   FROM document_chunks c
4533                   JOIN documents d ON d.doc_id = c.doc_id
4534                  WHERE c.chunk_id = ?1
4535                    AND d.status = 'active'",
4536                rusqlite::params![&chunk_id_q],
4537                |r| {
4538                    Ok((
4539                        ExpandedChunk {
4540                            chunk_id: r.get(0)?,
4541                            chunk_index: r.get(1)?,
4542                            content: r.get(2)?,
4543                        },
4544                        r.get::<_, i64>(3)?,
4545                    ))
4546                },
4547            )
4548            .map(Some)
4549            .or_else(|e| match e {
4550                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4551                other => Err(other),
4552            })
4553        })
4554        .await
4555        .map_err(ApiError::from)?;
4556
4557    let (chunk, created_at_ms) = row.ok_or_else(|| {
4558        ApiError::not_found(format!(
4559            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
4560        ))
4561    })?;
4562
4563    let full_text = Some(chunk.content.clone());
4564    let mut node = graph_node_for_chunk(tenant_id, &chunk);
4565    // Mirror the `/v1/graph/nodes` chunk-row behaviour: surface
4566    // `created_at_ms` so the inspector panel has a sortable timestamp.
4567    node.ts_ms = Some(created_at_ms);
4568
4569    Ok(GraphInspectResponse {
4570        node,
4571        full_text,
4572        triples_in: Vec::new(),
4573        triples_out: Vec::new(),
4574    })
4575}
4576
4577async fn inspect_cluster_node(
4578    tenant: &TenantHandle,
4579    tenant_id: &str,
4580    cluster_id: String,
4581    node_id_full: String,
4582) -> Result<GraphInspectResponse, ApiError> {
4583    let cluster_id_for_err = cluster_id.clone();
4584    let cluster_id_q = cluster_id.clone();
4585    let row: Option<(Option<String>, i64)> = tenant
4586        .read()
4587        .interact(move |conn| {
4588            conn.query_row(
4589                "SELECT sa.content, c.created_at_ms
4590                   FROM clusters c
4591                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
4592                  WHERE c.cluster_id = ?1",
4593                rusqlite::params![&cluster_id_q],
4594                |r| Ok((r.get::<_, Option<String>>(0)?, r.get::<_, i64>(1)?)),
4595            )
4596            .map(Some)
4597            .or_else(|e| match e {
4598                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4599                other => Err(other),
4600            })
4601        })
4602        .await
4603        .map_err(ApiError::from)?;
4604
4605    let (abstraction, created_at_ms) = row.ok_or_else(|| {
4606        ApiError::not_found(format!(
4607            "node_id {node_id_full:?} (cluster_id {cluster_id_for_err}) not found in current tenant"
4608        ))
4609    })?;
4610
4611    // full_text is "<cluster_id label>\n\n<abstraction>" when an abstraction
4612    // exists; just the label otherwise. Brief "cluster" -- the cluster
4613    // label is `clusters.cluster_id` (the user-facing label is the
4614    // abstraction; clusters don't have a `label` column).
4615    let full_text = match abstraction.as_deref() {
4616        Some(a) => Some(format!("cluster {cluster_id_for_err}\n\n{a}")),
4617        None => Some(format!("cluster {cluster_id_for_err}")),
4618    };
4619
4620    Ok(GraphInspectResponse {
4621        node: graph_node_for_cluster(
4622            tenant_id,
4623            &cluster_id_for_err,
4624            abstraction.as_deref(),
4625            created_at_ms,
4626        ),
4627        full_text,
4628        triples_in: Vec::new(),
4629        triples_out: Vec::new(),
4630    })
4631}
4632
4633async fn inspect_entity_node(
4634    tenant: &TenantHandle,
4635    tenant_id: &str,
4636    entity_value: String,
4637    node_id_full: String,
4638) -> Result<GraphInspectResponse, ApiError> {
4639    // Entities are synthetic. They "exist" only if at least one triple
4640    // references them as subject or object. Zero triples -> 404 per brief.
4641    let entity_q = entity_value.clone();
4642    let rows: Vec<TripleRow> = tenant
4643        .read()
4644        .interact(move |conn| {
4645            let mut stmt = conn.prepare(
4646                "SELECT subject_id, predicate, object_id, confidence
4647                   FROM triples
4648                  WHERE (subject_id = ?1 OR object_id = ?1)
4649                    AND status = 'active'
4650                  ORDER BY valid_from_ms DESC
4651                  LIMIT ?2",
4652            )?;
4653            stmt.query_map(
4654                rusqlite::params![&entity_q, GRAPH_INSPECT_ENTITY_TRIPLES_CAP],
4655                |r| {
4656                    Ok(TripleRow {
4657                        subject_id: r.get(0)?,
4658                        predicate: r.get(1)?,
4659                        object_id: r.get(2)?,
4660                        confidence: r.get(3)?,
4661                    })
4662                },
4663            )?
4664            .collect::<rusqlite::Result<Vec<_>>>()
4665        })
4666        .await
4667        .map_err(ApiError::from)?;
4668
4669    if rows.is_empty() {
4670        return Err(ApiError::not_found(format!(
4671            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be inspectable"
4672        )));
4673    }
4674
4675    // Triples flow out FROM the entity to its counterpart. For each row
4676    // determine which side the entity appears on and emit ent:<self> ->
4677    // ent:<other>. Brief calls these triples_out (entities don't have
4678    // structural triples_in in v0.10.0 P1).
4679    let mut triples_out = Vec::with_capacity(rows.len());
4680    for t in rows {
4681        let other = if t.subject_id == entity_value {
4682            t.object_id
4683        } else {
4684            // entity_value matched on object_id; counterpart is subject.
4685            t.subject_id
4686        };
4687        let tgt_id = format!("ent:{other}");
4688        triples_out.push(GraphEdge {
4689            id: edge_id(&node_id_full, "triple", &tgt_id),
4690            source: node_id_full.clone(),
4691            target: tgt_id,
4692            kind: "triple",
4693            predicate: Some(t.predicate),
4694            weight: Some(t.confidence),
4695        });
4696    }
4697
4698    Ok(GraphInspectResponse {
4699        node: graph_node_for_entity(tenant_id, &entity_value),
4700        full_text: None,
4701        triples_in: Vec::new(),
4702        triples_out,
4703    })
4704}
4705
4706// ---------------------------------------------------------------------------
4707// Graph neighbors -- unified explicit + HNSW-semantic (v0.10.0)
4708//
4709// `GET /v1/graph/neighbors/{id}` powers solo-web's "show similar" overlay.
4710// Returns the same `GraphResponse { nodes, edges }` envelope as the rest of
4711// the family, combining:
4712//
4713//   * Explicit edges (triples / document_chunk / cluster_member) incident
4714//     to the focal node -- the same shape `/v1/graph/expand` produces for
4715//     a given (node_id, edge_kind) pair, but UNIONed across every edge kind
4716//     compatible with the focal node's kind.
4717//
4718//   * HNSW-semantic edges (cosine-similarity neighbors) -- only valid for
4719//     `ep:` (episodes) and `chunk:` (chunks); other source kinds return
4720//     400 when `kind=semantic` is requested alone, or are silently skipped
4721//     when `kind=both` is requested (explicit-only path still runs).
4722//
4723// Why this isn't just expand-with-a-flag: `/v1/graph/expand` takes a
4724// specific `kind=<edge-kind>` parameter and expands along ONE edge kind at
4725// a time. `/v1/graph/neighbors/:id` UNIFIES all compatible edge kinds
4726// incident to the focal node into one response. Different UX (drill vs.
4727// overview); different API; both needed.
4728//
4729// ## Refactor decision
4730//
4731// The brief recommends extracting `expand`'s per-kind helpers into a
4732// shared module. In practice the `expand_*` async fns already do exactly
4733// what neighbors needs for the explicit path (same response shape, same
4734// tenant + auth + existence semantics). To keep the change surgical and
4735// to preserve `expand`'s existing tests byte-for-byte, neighbors **reuses
4736// the existing `expand_*` async fns directly** rather than refactoring
4737// their bodies. The explicit path is a thin orchestrator that calls every
4738// `expand_*` fn compatible with the focal node's kind and concatenates
4739// the results.
4740//
4741// ## Dedup rule (kind=both)
4742//
4743// When an edge with the same (source, target) appears in BOTH the
4744// explicit and the semantic result sets, the explicit edge wins -- the
4745// semantic edge is dropped. We dedupe by `(source, target)` (NOT by full
4746// edge id, which encodes the kind too): the rule "explicit beats
4747// semantic" only makes sense when both endpoints agree, regardless of
4748// kind. In practice this is most likely to fire when an entity-focused
4749// expand (which surfaces episodes as triple-targets) collides with a
4750// semantic search hit on the same episode pair.
4751//
4752// ## Limit policy
4753//
4754// `limit` is applied PER KIND, not total. With `limit=25` and
4755// `kind=both`, the response carries up to 25 explicit + 25 semantic
4756// edges (minus dedupe). Silent clamp at 100 (matches the rest of the
4757// `/v1/graph/*` family).
4758//
4759// ## Threshold filter
4760//
4761// `threshold` (default 0.75) filters semantic neighbors by
4762// `weight >= threshold`, where `weight = (1 - cos_distance).max(0)`. The
4763// default is conservative -- below 0.75 the renderer typically shows too
4764// many spurious edges for a useful "show similar" overlay. Callers can
4765// dial down (e.g. `?threshold=0.5`) for a broader view.
4766//
4767// See `docs/dev-log/0116-graph-neighbors-impl.md` for the design notes.
4768// ---------------------------------------------------------------------------
4769
4770/// Default page size when the caller omits `?limit=`. Conservative so the
4771/// "show similar" overlay isn't visually overwhelming on first click.
4772const GRAPH_NEIGHBORS_DEFAULT_LIMIT: u32 = 25;
4773/// Silent clamp ceiling. Matches the rest of the `/v1/graph/*` family.
4774const GRAPH_NEIGHBORS_MAX_LIMIT: u32 = 100;
4775/// Conservative similarity floor. Edges with `weight < threshold` are
4776/// dropped from the semantic result set.
4777const GRAPH_NEIGHBORS_DEFAULT_THRESHOLD: f32 = 0.75;
4778
4779/// Discriminator for which neighbor kinds the caller wants. Default is
4780/// `both` (explicit edges + HNSW-semantic).
4781#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
4782#[serde(rename_all = "snake_case")]
4783enum GraphNeighborsKind {
4784    Explicit,
4785    Semantic,
4786    #[default]
4787    Both,
4788}
4789
4790#[derive(Debug, Deserialize)]
4791struct GraphNeighborsQuery {
4792    #[serde(default)]
4793    kind: Option<GraphNeighborsKind>,
4794    #[serde(default)]
4795    threshold: Option<f32>,
4796    #[serde(default)]
4797    limit: Option<u32>,
4798}
4799
4800/// `GET /v1/graph/neighbors/{id}`. See module-level comments.
4801async fn graph_neighbors_handler(
4802    TenantExtractor(tenant): TenantExtractor,
4803    Path(id): Path<String>,
4804    Query(q): Query<GraphNeighborsQuery>,
4805) -> Result<Json<GraphExpandResponse>, ApiError> {
4806    let kind = q.kind.unwrap_or_default();
4807    let threshold = q.threshold.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_THRESHOLD);
4808    if !(0.0..=1.0).contains(&threshold) {
4809        return Err(ApiError::bad_request(format!(
4810            "threshold must be in [0.0, 1.0]; got {threshold}"
4811        )));
4812    }
4813    // Silent clamp at GRAPH_NEIGHBORS_MAX_LIMIT -- matches expand /
4814    // nodes / edges convention. Test `neighbors_limit_clamped_at_100`
4815    // locks in the clamp policy.
4816    let limit_raw = q.limit.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_LIMIT);
4817    let limit = limit_raw.clamp(1, GRAPH_NEIGHBORS_MAX_LIMIT);
4818
4819    let (node_kind, value) = parse_node_id(&id)?;
4820    let value_owned = value.to_string();
4821    let tenant_id_str = tenant.tenant_id().to_string();
4822    let node_id_full = id;
4823
4824    // Existence probe for the focal node. The explicit + semantic paths
4825    // each handle "node-found-but-zero-neighbors" gracefully (200 with
4826    // empty arrays) -- but we want a true 404 when the id resolves to no
4827    // row at all, regardless of which kind the caller asked for. This
4828    // matches the inspect endpoint's gate: a node has to exist to be
4829    // meaningfully "neighborable".
4830    ensure_neighbors_focal_exists(&tenant, node_kind, &value_owned, &node_id_full).await?;
4831
4832    // Dispatch.
4833    let (explicit_nodes, explicit_edges) = if matches!(
4834        kind,
4835        GraphNeighborsKind::Explicit | GraphNeighborsKind::Both
4836    ) {
4837        neighbors_explicit(
4838            &tenant,
4839            &tenant_id_str,
4840            node_kind,
4841            &value_owned,
4842            &node_id_full,
4843            limit as i64,
4844        )
4845        .await?
4846    } else {
4847        (Vec::new(), Vec::new())
4848    };
4849
4850    let (semantic_nodes, semantic_edges) = if matches!(
4851        kind,
4852        GraphNeighborsKind::Semantic | GraphNeighborsKind::Both
4853    ) {
4854        match neighbors_semantic(
4855            &tenant,
4856            &tenant_id_str,
4857            node_kind,
4858            &value_owned,
4859            &node_id_full,
4860            limit,
4861            threshold,
4862        )
4863        .await
4864        {
4865            Ok(parts) => parts,
4866            Err(e) => {
4867                // `kind=semantic` alone against an unsupported focal node
4868                // (doc/cl/ent) is a hard 400 -- the caller asked for ONLY
4869                // semantic neighbors and there are none possible.
4870                //
4871                // `kind=both` against an unsupported focal node silently
4872                // skips the semantic step; the explicit path still
4873                // delivers a meaningful answer. This mirrors the
4874                // pragmatic UX: clicking "show similar" on an entity
4875                // still surfaces the entity's triples without surfacing a
4876                // pointless error.
4877                if matches!(kind, GraphNeighborsKind::Semantic) {
4878                    return Err(e);
4879                }
4880                (Vec::new(), Vec::new())
4881            }
4882        }
4883    } else {
4884        (Vec::new(), Vec::new())
4885    };
4886
4887    // Merge + dedupe. Explicit edges win over semantic edges with the
4888    // same (source, target). Nodes dedupe by id.
4889    let mut explicit_endpoints: std::collections::HashSet<(String, String)> =
4890        std::collections::HashSet::with_capacity(explicit_edges.len());
4891    for e in &explicit_edges {
4892        explicit_endpoints.insert((e.source.clone(), e.target.clone()));
4893    }
4894
4895    let mut nodes: Vec<GraphNode> = Vec::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4896    let mut edges: Vec<GraphEdge> = Vec::with_capacity(explicit_edges.len() + semantic_edges.len());
4897    let mut seen_node_ids: std::collections::HashSet<String> =
4898        std::collections::HashSet::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4899
4900    for n in explicit_nodes {
4901        if seen_node_ids.insert(n.id.clone()) {
4902            nodes.push(n);
4903        }
4904    }
4905    for e in explicit_edges {
4906        edges.push(e);
4907    }
4908    for n in semantic_nodes {
4909        if seen_node_ids.insert(n.id.clone()) {
4910            nodes.push(n);
4911        }
4912    }
4913    for e in semantic_edges {
4914        if explicit_endpoints.contains(&(e.source.clone(), e.target.clone())) {
4915            // Explicit edge already covers this pair -- drop the semantic
4916            // duplicate per the dedup rule. The semantic node may still
4917            // remain in `nodes` if no other edge already pulled it in;
4918            // that's fine -- the renderer renders nodes with weight-less
4919            // structural edges either way.
4920            continue;
4921        }
4922        edges.push(e);
4923    }
4924
4925    Ok(Json(GraphExpandResponse { nodes, edges }))
4926}
4927
4928/// Existence probe for the focal node. Translates the prefixed id into a
4929/// per-kind COUNT query against the matching table. Returns 404 (not 200
4930/// with empty arrays) when the node doesn't exist in the tenant's DB.
4931/// For entities the "existence" check is "is this entity referenced by
4932/// at least one triple" -- consistent with the inspect-entity contract
4933/// from `0115`.
4934async fn ensure_neighbors_focal_exists(
4935    tenant: &TenantHandle,
4936    node_kind: NodeKind,
4937    value: &str,
4938    node_id_full: &str,
4939) -> Result<(), ApiError> {
4940    match node_kind {
4941        NodeKind::Episode => ensure_episode_exists(tenant, value, node_id_full).await,
4942        NodeKind::Cluster => ensure_cluster_exists(tenant, value, node_id_full).await,
4943        NodeKind::Document => ensure_document_exists(tenant, value, node_id_full).await,
4944        NodeKind::Chunk => ensure_chunk_exists(tenant, value, node_id_full).await,
4945        NodeKind::Entity => ensure_entity_referenced(tenant, value, node_id_full).await,
4946    }
4947}
4948
4949/// 404 if the chunk_id has no row in this tenant's `document_chunks`
4950/// table whose parent doc is active. Mirrors `ensure_*_exists` from
4951/// `expand`.
4952async fn ensure_chunk_exists(
4953    tenant: &TenantHandle,
4954    chunk_id: &str,
4955    node_id_full: &str,
4956) -> Result<(), ApiError> {
4957    let chunk_id_q = chunk_id.to_string();
4958    let exists: i64 = tenant
4959        .read()
4960        .interact(move |conn| {
4961            conn.query_row(
4962                "SELECT COUNT(*)
4963                   FROM document_chunks c
4964                   JOIN documents d ON d.doc_id = c.doc_id
4965                  WHERE c.chunk_id = ?1
4966                    AND d.status = 'active'",
4967                rusqlite::params![&chunk_id_q],
4968                |r| r.get(0),
4969            )
4970        })
4971        .await
4972        .map_err(ApiError::from)?;
4973    if exists == 0 {
4974        return Err(ApiError::not_found(format!(
4975            "node_id {node_id_full:?} not found in current tenant"
4976        )));
4977    }
4978    Ok(())
4979}
4980
4981/// 404 if the entity isn't referenced by at least one active triple in
4982/// the tenant. Matches the inspect-entity 404 contract: entities are
4983/// synthetic, "existence" is "shows up in at least one triple".
4984async fn ensure_entity_referenced(
4985    tenant: &TenantHandle,
4986    entity_value: &str,
4987    node_id_full: &str,
4988) -> Result<(), ApiError> {
4989    let entity_q = entity_value.to_string();
4990    let exists: i64 = tenant
4991        .read()
4992        .interact(move |conn| {
4993            conn.query_row(
4994                "SELECT COUNT(*)
4995                   FROM triples
4996                  WHERE (subject_id = ?1 OR object_id = ?1)
4997                    AND status = 'active'",
4998                rusqlite::params![&entity_q],
4999                |r| r.get(0),
5000            )
5001        })
5002        .await
5003        .map_err(ApiError::from)?;
5004    if exists == 0 {
5005        return Err(ApiError::not_found(format!(
5006            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be neighborable"
5007        )));
5008    }
5009    Ok(())
5010}
5011
5012/// Explicit-neighbor path. Dispatches per focal node kind, calling the
5013/// existing `expand_*` async fns for each compatible edge kind and
5014/// concatenating the results. This is the "reuse" refactor decision:
5015/// no duplication of expand's SQL, and expand's tests stay byte-for-byte
5016/// intact because we don't touch its bodies.
5017async fn neighbors_explicit(
5018    tenant: &TenantHandle,
5019    tenant_id: &str,
5020    node_kind: NodeKind,
5021    value: &str,
5022    node_id_full: &str,
5023    limit: i64,
5024) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5025    let mut nodes: Vec<GraphNode> = Vec::new();
5026    let mut edges: Vec<GraphEdge> = Vec::new();
5027
5028    match node_kind {
5029        NodeKind::Episode => {
5030            // Episodes have two compatible explicit-edge kinds:
5031            //   * cluster_member (episode -> clusters)
5032            //   * triple (episode -> entities, plus subj/obj entity pairs)
5033            //
5034            // document_chunk doesn't apply (episodes aren't documents).
5035            // Run each path, concat. Per-kind limit -- the caller asked for
5036            // up to `limit` neighbors PER KIND.
5037            let r1 =
5038                expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
5039                    .await?;
5040            nodes.extend(r1.nodes);
5041            edges.extend(r1.edges);
5042            let r2 =
5043                expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
5044            nodes.extend(r2.nodes);
5045            edges.extend(r2.edges);
5046        }
5047        NodeKind::Document => {
5048            // Documents have one compatible explicit-edge kind:
5049            // document_chunk (document -> chunks).
5050            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
5051                .await?;
5052            nodes.extend(r.nodes);
5053            edges.extend(r.edges);
5054        }
5055        NodeKind::Chunk => {
5056            // Chunks have one compatible explicit-edge kind:
5057            // document_chunk (chunk -> parent document).
5058            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
5059                .await?;
5060            nodes.extend(r.nodes);
5061            edges.extend(r.edges);
5062        }
5063        NodeKind::Cluster => {
5064            // Clusters have one compatible explicit-edge kind:
5065            // cluster_member (cluster -> episodes).
5066            let r = expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
5067                .await?;
5068            nodes.extend(r.nodes);
5069            edges.extend(r.edges);
5070        }
5071        NodeKind::Entity => {
5072            // Entities have one compatible explicit-edge kind:
5073            // triple (entity -> episodes where this entity is referenced).
5074            let r = expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
5075            nodes.extend(r.nodes);
5076            edges.extend(r.edges);
5077        }
5078    }
5079    Ok((nodes, edges))
5080}
5081
5082/// Semantic-neighbor path. Only valid for episode + chunk focal nodes;
5083/// other kinds return 400. Reuses the existing inner pipelines:
5084///
5085///   * Episodes -> `solo_query::recall::run_recall_inner` (same path
5086///     `expand_semantic` uses; filters out chunk hits).
5087///   * Chunks   -> `solo_query::doc_search::run_doc_search_inner` (the
5088///     equivalent chunk-restricted vector pipeline).
5089///
5090/// Re-embed the focal node's content for the HNSW query rather than
5091/// loading the persisted vector from `embeddings` -- the same trade-off
5092/// `expand_semantic` made: cheaper code path overall, with deterministic
5093/// embedders in tests + batch-sized embedders in prod making the recompute
5094/// cost negligible.
5095async fn neighbors_semantic(
5096    tenant: &TenantHandle,
5097    tenant_id: &str,
5098    node_kind: NodeKind,
5099    value: &str,
5100    node_id_full: &str,
5101    limit: u32,
5102    threshold: f32,
5103) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5104    match node_kind {
5105        NodeKind::Episode => {
5106            neighbors_semantic_from_episode(
5107                tenant,
5108                tenant_id,
5109                value,
5110                node_id_full,
5111                limit,
5112                threshold,
5113            )
5114            .await
5115        }
5116        NodeKind::Chunk => {
5117            neighbors_semantic_from_chunk(tenant, tenant_id, value, node_id_full, limit, threshold)
5118                .await
5119        }
5120        _ => Err(ApiError::bad_request(format!(
5121            "semantic neighbors only valid for episode or chunk source; got {}",
5122            node_kind.as_wire_str()
5123        ))),
5124    }
5125}
5126
5127async fn neighbors_semantic_from_episode(
5128    tenant: &TenantHandle,
5129    tenant_id: &str,
5130    memory_id: &str,
5131    node_id_full: &str,
5132    limit: u32,
5133    threshold: f32,
5134) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5135    let memory_id_q = memory_id.to_string();
5136    let memory_id_for_self_excl = memory_id.to_string();
5137    let content: Option<String> = tenant
5138        .read()
5139        .interact(move |conn| {
5140            conn.query_row(
5141                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
5142                rusqlite::params![&memory_id_q],
5143                |r| r.get::<_, String>(0),
5144            )
5145            .map(Some)
5146            .or_else(|e| match e {
5147                rusqlite::Error::QueryReturnedNoRows => Ok(None),
5148                other => Err(other),
5149            })
5150        })
5151        .await
5152        .map_err(ApiError::from)?;
5153
5154    // Existence is guaranteed by the focal-exists probe earlier; an
5155    // empty content here would be a status-transition race we treat as
5156    // "nothing to compare against".
5157    let Some(content) = content else {
5158        return Ok((Vec::new(), Vec::new()));
5159    };
5160
5161    // Widen the request by 1 so dropping self doesn't shrink the page.
5162    let widened = (limit as usize).saturating_add(1).min(100);
5163    let result = solo_query::recall::run_recall_inner(
5164        tenant.embedder(),
5165        tenant.hnsw(),
5166        tenant.read(),
5167        &content,
5168        widened,
5169    )
5170    .await
5171    .map_err(ApiError::from)?;
5172
5173    let mut nodes = Vec::new();
5174    let mut edges = Vec::new();
5175    for hit in result.hits.into_iter() {
5176        if hit.memory_id == memory_id_for_self_excl {
5177            // Skip self.
5178            continue;
5179        }
5180        if nodes.len() as u32 >= limit {
5181            break;
5182        }
5183        let weight = (1.0 - hit.cos_distance).max(0.0);
5184        if weight < threshold {
5185            continue;
5186        }
5187        let target_id = format!("ep:{}", hit.memory_id);
5188        edges.push(GraphEdge {
5189            id: edge_id(node_id_full, "semantic", &target_id),
5190            source: node_id_full.to_string(),
5191            target: target_id,
5192            kind: "semantic",
5193            predicate: None,
5194            weight: Some(weight),
5195        });
5196        nodes.push(GraphNode {
5197            id: format!("ep:{}", hit.memory_id),
5198            kind: NodeKind::Episode.as_wire_str(),
5199            label: episode_label(&hit.content),
5200            ts_ms: None,
5201            tenant_id: tenant_id.to_string(),
5202            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
5203        });
5204    }
5205    Ok((nodes, edges))
5206}
5207
5208async fn neighbors_semantic_from_chunk(
5209    tenant: &TenantHandle,
5210    tenant_id: &str,
5211    chunk_id: &str,
5212    node_id_full: &str,
5213    limit: u32,
5214    threshold: f32,
5215) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
5216    let chunk_id_q = chunk_id.to_string();
5217    let chunk_id_for_self_excl = chunk_id.to_string();
5218    let content: Option<String> = tenant
5219        .read()
5220        .interact(move |conn| {
5221            conn.query_row(
5222                "SELECT c.content
5223                   FROM document_chunks c
5224                   JOIN documents d ON d.doc_id = c.doc_id
5225                  WHERE c.chunk_id = ?1
5226                    AND d.status = 'active'",
5227                rusqlite::params![&chunk_id_q],
5228                |r| r.get::<_, String>(0),
5229            )
5230            .map(Some)
5231            .or_else(|e| match e {
5232                rusqlite::Error::QueryReturnedNoRows => Ok(None),
5233                other => Err(other),
5234            })
5235        })
5236        .await
5237        .map_err(ApiError::from)?;
5238
5239    let Some(content) = content else {
5240        return Ok((Vec::new(), Vec::new()));
5241    };
5242
5243    let widened = (limit as usize).saturating_add(1).min(100);
5244    let hits = solo_query::doc_search::run_doc_search_inner(
5245        tenant.embedder(),
5246        tenant.hnsw(),
5247        tenant.read(),
5248        &content,
5249        widened,
5250    )
5251    .await
5252    .map_err(ApiError::from)?;
5253
5254    let mut nodes = Vec::new();
5255    let mut edges = Vec::new();
5256    for hit in hits.into_iter() {
5257        if hit.chunk_id == chunk_id_for_self_excl {
5258            continue;
5259        }
5260        if nodes.len() as u32 >= limit {
5261            break;
5262        }
5263        let weight = (1.0 - hit.cos_distance).max(0.0);
5264        if weight < threshold {
5265            continue;
5266        }
5267        let target_id = format!("chunk:{}", hit.chunk_id);
5268        edges.push(GraphEdge {
5269            id: edge_id(node_id_full, "semantic", &target_id),
5270            source: node_id_full.to_string(),
5271            target: target_id,
5272            kind: "semantic",
5273            predicate: None,
5274            weight: Some(weight),
5275        });
5276        let exp = ExpandedChunk {
5277            chunk_id: hit.chunk_id.clone(),
5278            chunk_index: hit.chunk_index as i64,
5279            content: hit.content.clone(),
5280        };
5281        nodes.push(graph_node_for_chunk(tenant_id, &exp));
5282    }
5283    Ok((nodes, edges))
5284}
5285
5286// ---------------------------------------------------------------------------
5287// /v1/graph/stream — SSE invalidation feed (v0.10.0)
5288//
5289// Powers solo-web's live-update behaviour: instead of polling, the
5290// frontend subscribes once and refetches its pages only when the
5291// writer-actor signals "your tenant's data changed". Per scoping doc
5292// §3 Decision C, the wire format is invalidation-shaped (not row
5293// payload) — the SSE channel says "refetch the affected page" rather
5294// than streaming actual rows.
5295//
5296// Wire format:
5297//
5298//   ```
5299//   event: init
5300//   data: {"connected": true, "tenant_id": "default", "ts_ms": 1715625600000}
5301//
5302//   event: invalidate
5303//   data: {"reason": "memory.remember", "tenant_id": "default",
5304//          "ts_ms": 1715625610000, "kind": "episode"}
5305//
5306//   event: heartbeat
5307//   data: {"ts_ms": 1715625640000}
5308//   ```
5309//
5310// Heartbeat: every [`STREAM_HEARTBEAT_SECS`] seconds, regardless of
5311// whether real events fired (simpler than resetting the timer on every
5312// invalidate; the cost is a few extra bytes per minute on idle).
5313//
5314// Lagged subscribers (subscriber polled slower than 256 writes) see one
5315// emit-only-once warning and resync via the next real `invalidate` —
5316// invalidation events are idempotent, so the missed batch reduces to a
5317// single refetch on the client side. No correctness loss.
5318//
5319// See `docs/dev-log/0117-graph-stream-impl.md` for the full design.
5320// ---------------------------------------------------------------------------
5321
5322/// Heartbeat interval for `/v1/graph/stream`. Fires unconditionally
5323/// every 30 seconds — easier to reason about than "fire 30s after the
5324/// last event", and keeps proxies happy without code that races a
5325/// reset on every invalidate.
5326pub const STREAM_HEARTBEAT_SECS: u64 = 30;
5327
5328/// SSE event name emitted on connection open. Single fire; client uses
5329/// this to confirm the subscription is live.
5330const STREAM_EVENT_INIT: &str = "init";
5331
5332/// SSE event name emitted on every writer-actor commit (and on
5333/// `gdpr.forget_user`'s non-writer-actor cascade).
5334const STREAM_EVENT_INVALIDATE: &str = "invalidate";
5335
5336/// SSE event name emitted by the heartbeat interval.
5337const STREAM_EVENT_HEARTBEAT: &str = "heartbeat";
5338
5339/// `GET /v1/graph/stream` — Server-Sent Events feed of
5340/// `InvalidateEvent`s scoped to the request's tenant.
5341///
5342/// Subscribes to the per-tenant `broadcast::Sender<InvalidateEvent>`
5343/// held by `TenantHandle` (populated by `TenantHandle::open`). The
5344/// stream:
5345///
5346///   1. Emits one `event: init` line at connection open.
5347///   2. Selects between (broadcast recv) and (heartbeat tick) in a
5348///      loop, emitting `invalidate` / `heartbeat` events as either
5349///      fires.
5350///   3. Exits when the client closes the connection (axum drops the
5351///      response future) OR the broadcast Sender is dropped (tenant
5352///      shutdown).
5353///
5354/// Auth + tenant resolution mirror the rest of `/v1/graph/*`: the
5355/// `auth_middleware` returns 401 on missing bearer; the
5356/// `TenantExtractor` resolves the per-tenant DB. The handler itself
5357/// has no per-route auth logic.
5358async fn graph_stream_handler(
5359    TenantExtractor(tenant): TenantExtractor,
5360) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
5361    // Subscribe BEFORE building the init event so a writer-actor
5362    // commit that lands in the (microscopic) window between init and
5363    // the first poll is still observed. `broadcast::Receiver` buffers
5364    // up to the channel's capacity from the moment of subscribe.
5365    let rx = tenant.invalidate_sender().subscribe();
5366    let tenant_id = tenant.tenant_id().to_string();
5367    let stream = build_invalidate_stream(rx, tenant_id, STREAM_HEARTBEAT_SECS);
5368    // axum's keep-alive layer adds its own `:` comment line every
5369    // configured interval; we keep that OFF and ship our own typed
5370    // `heartbeat` event instead. The client distinguishes the two by
5371    // looking at the SSE `event:` field — typed heartbeats let solo-web
5372    // surface "connection healthy" in its UI without parsing comment
5373    // lines.
5374    Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)))
5375}
5376
5377/// Per-subscriber state threaded through `futures::stream::unfold`.
5378/// Carries the receiver + heartbeat interval + a one-shot flag for
5379/// the initial `init` event.
5380struct StreamState {
5381    rx: broadcast::Receiver<InvalidateEvent>,
5382    heartbeat: tokio::time::Interval,
5383    tenant_id: String,
5384    /// `true` until the first poll completes — used to gate the `init`
5385    /// event. Flipped to `false` after the init event yields.
5386    needs_init: bool,
5387}
5388
5389/// Build the stream of SSE [`Event`]s for one subscriber.
5390///
5391/// First yield is the `init` event. After that, the stream selects
5392/// between the broadcast receiver and a tokio interval timer that
5393/// fires every `heartbeat_secs` seconds. Lagged broadcast errors are
5394/// swallowed with a single `tracing::warn!` line — the client resyncs
5395/// on the next real invalidate (invalidation events are idempotent).
5396fn build_invalidate_stream(
5397    rx: broadcast::Receiver<InvalidateEvent>,
5398    tenant_id: String,
5399    heartbeat_secs: u64,
5400) -> impl Stream<Item = Result<Event, Infallible>> {
5401    // `tokio::time::interval_at(start, period)` starts ticking at
5402    // `start`; we set `start = now + period` so the first heartbeat
5403    // lands `heartbeat_secs` AFTER the init event. Without `interval_at`
5404    // the default `interval()` would fire immediately at t=0, racing
5405    // the init event.
5406    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
5407    let heartbeat = tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));
5408
5409    let state = StreamState {
5410        rx,
5411        heartbeat,
5412        tenant_id,
5413        needs_init: true,
5414    };
5415    futures::stream::unfold(state, move |mut state| async move {
5416        // First-poll: yield the init event without touching the
5417        // receiver or the heartbeat. Subsequent polls fall through to
5418        // the select loop.
5419        if state.needs_init {
5420            state.needs_init = false;
5421            let init_payload = serde_json::json!({
5422                "connected": true,
5423                "tenant_id": state.tenant_id,
5424                "ts_ms": chrono::Utc::now().timestamp_millis(),
5425            });
5426            let ev = Event::default()
5427                .event(STREAM_EVENT_INIT)
5428                .json_data(init_payload)
5429                .unwrap_or_else(|_| Event::default().event(STREAM_EVENT_INIT));
5430            return Some((Ok::<Event, Infallible>(ev), state));
5431        }
5432        loop {
5433            tokio::select! {
5434                event = state.rx.recv() => {
5435                    match event {
5436                        Ok(ev) => {
5437                            let sse_event = Event::default()
5438                                .event(STREAM_EVENT_INVALIDATE)
5439                                .json_data(&ev)
5440                                .unwrap_or_else(|_| Event::default()
5441                                    .event(STREAM_EVENT_INVALIDATE));
5442                            return Some((Ok::<Event, Infallible>(sse_event), state));
5443                        }
5444                        Err(broadcast::error::RecvError::Lagged(n)) => {
5445                            tracing::warn!(
5446                                lagged = n,
5447                                "graph stream subscriber lagged; client will \
5448                                 resync on the next real invalidate"
5449                            );
5450                            // Continue receiving — do NOT yield anything
5451                            // for a lag.
5452                        }
5453                        Err(broadcast::error::RecvError::Closed) => {
5454                            tracing::debug!(
5455                                "graph stream broadcast closed; ending SSE stream"
5456                            );
5457                            return None;
5458                        }
5459                    }
5460                }
5461                _ = state.heartbeat.tick() => {
5462                    let hb_payload = serde_json::json!({
5463                        "ts_ms": chrono::Utc::now().timestamp_millis(),
5464                    });
5465                    let sse_event = Event::default()
5466                        .event(STREAM_EVENT_HEARTBEAT)
5467                        .json_data(hb_payload)
5468                        .unwrap_or_else(|_| Event::default()
5469                            .event(STREAM_EVENT_HEARTBEAT));
5470                    return Some((Ok::<Event, Infallible>(sse_event), state));
5471                }
5472            }
5473        }
5474    })
5475}
5476
5477// ---------------------------------------------------------------------------
5478// /v1/status — authenticated readiness/status (tenant-aware)
5479
5480#[derive(Debug, Serialize)]
5481struct StatusEmbedder {
5482    name: String,
5483    version: String,
5484    dim: usize,
5485    dtype: String,
5486}
5487
5488#[derive(Debug, Serialize)]
5489struct StatusTenant {
5490    id: String,
5491    registered: bool,
5492    status: Option<TenantStatusJson>,
5493    quota_bytes: Option<u64>,
5494    last_accessed_ms: Option<i64>,
5495}
5496
5497#[derive(Debug, Serialize)]
5498struct StatusMcp {
5499    sessions: usize,
5500}
5501
5502#[derive(Debug, Serialize)]
5503struct StatusResponse {
5504    ok: bool,
5505    version: &'static str,
5506    tenant: StatusTenant,
5507    embedder: StatusEmbedder,
5508    active_tenants: usize,
5509    mcp: StatusMcp,
5510}
5511
5512async fn status_handler(
5513    State(state): State<SoloHttpState>,
5514    TenantExtractor(tenant): TenantExtractor,
5515) -> Result<Json<StatusResponse>, ApiError> {
5516    let active_tenants = state.registry.list_active().await.map_err(ApiError::from)?;
5517    let tenant_record = active_tenants
5518        .iter()
5519        .find(|record| &record.tenant_id == tenant.tenant_id());
5520    let embedder = tenant.embedder();
5521    Ok(Json(StatusResponse {
5522        ok: true,
5523        version: env!("CARGO_PKG_VERSION"),
5524        tenant: StatusTenant {
5525            id: tenant.tenant_id().to_string(),
5526            registered: tenant_record.is_some(),
5527            status: tenant_record.map(|record| TenantStatusJson::from(&record.status)),
5528            quota_bytes: tenant_record.and_then(|record| record.quota_bytes),
5529            last_accessed_ms: tenant_record.and_then(|record| record.last_accessed_ms),
5530        },
5531        embedder: StatusEmbedder {
5532            name: embedder.name().to_string(),
5533            version: embedder.version().to_string(),
5534            dim: embedder.dim(),
5535            dtype: format!("{:?}", embedder.dtype()).to_ascii_lowercase(),
5536        },
5537        active_tenants: active_tenants.len(),
5538        mcp: StatusMcp {
5539            sessions: state.mcp_sessions.len(),
5540        },
5541    }))
5542}
5543
5544// /v1/tenants — principal-scoped tenant list (v0.10.0 + v0.10.1 hydration)
5545//
5546// Powers solo-web's top-bar tenant picker (Decision F in
5547// `docs/dev-log/0105-solo-web-scoping.md` §3, route shape locked in §4
5548// Route 6). The endpoint is **read-only**; admin CRUD (create / delete /
5549// rename / quota change) remains CLI-only per ADR-0004 §"Admin operations".
5550// That keeps the privileged tenant-mutation surface off HTTP entirely
5551// while still letting an authenticated browser session enumerate the
5552// tenants it's allowed to see.
5553//
5554// Wire shape (200 OK):
5555//
5556//   ```json
5557//   {
5558//     "tenants": [
5559//       {
5560//         "id": "default",
5561//         "display_name": "Default tenant",
5562//         "created_at_ms": 1715625600000,
5563//         "last_accessed_ms": 1715625900000,
5564//         "status": "active",
5565//         "quota_bytes": null,
5566//         "episode_count": null,
5567//         "size_bytes": null,
5568//         "pct_used": null
5569//       }
5570//     ]
5571//   }
5572//   ```
5573//
5574// The numeric `episode_count` / `size_bytes` / `pct_used` fields were
5575// **always `null` in v0.10.0** (cost-deferred). v0.10.1 hydrates them
5576// for real via `TenantRegistry::hydrate_tenant_cost_numbers`:
5577//
5578//   * `size_bytes` — `std::fs::metadata(<data_dir>/tenants/<db>.db).len()`.
5579//     Cheap; runs for every visible tenant.
5580//   * `episode_count` — `SELECT COUNT(*) FROM episodes WHERE
5581//     status='active'` against the per-tenant SQLCipher DB.
5582//   * `pct_used` — `size_bytes * 100 / quota_bytes` (f64, capped at
5583//     100.0) when both are known; `null` if `quota_bytes` is unset.
5584//
5585// **Cap**: opening + counting N tenant DBs is N×~10ms; the first-paint
5586// budget is tight, so we cap `episode_count` hydration at
5587// `TENANTS_COUNT_HYDRATION_CAP` (50) per request. Tenants beyond the
5588// cap get `episode_count: null` and the response carries an
5589// `X-Solo-Tenants-Count-Cap-Reached: true` header so clients can fetch
5590// counts for the tail tenants out-of-band if needed (mirroring the
5591// entity-cap pattern from `/v1/graph/nodes`). `size_bytes` is not
5592// capped — it's just a `metadata` call.
5593//
5594// The CLI's `solo tenants list` retains the canonical per-tenant
5595// cost-numbers path for operators who need exhaustive data.
5596//
5597// ## Visibility filter (load-bearing — three cases)
5598//
5599// The handler reads `AuthenticatedPrincipal` out of request extensions
5600// via `MaybePrincipal` and filters the registry list before
5601// serialisation:
5602//
5603//   1. **No principal** (`MaybePrincipal(None)`) — unauthenticated
5604//      loopback path, no `[auth]` block in `solo.config.toml`. Return
5605//      every `Active` tenant. Same scope as `solo tenants list` CLI.
5606//   2. **Bearer principal** (`subject == "bearer" && claims.is_null()`,
5607//      the `AuthenticatedPrincipal::bearer` signature emitted by
5608//      `BearerValidator::validate`). Single-principal daemon — the
5609//      bearer holder is the operator, so return every `Active`
5610//      tenant. Functionally equivalent to (1) from a leakage
5611//      standpoint.
5612//   3. **OIDC principal** (any other principal — `claims` carries the
5613//      JWT object). Filter to ONLY the tenant id matching
5614//      `principal.tenant_claim`. The configured OIDC tenant_claim is
5615//      already validated to a real `TenantId` by the auth middleware
5616//      (a `MissingTenantClaim` or `InvalidTenantClaim` shorts out at
5617//      403 BEFORE this handler runs). If the claim doesn't match any
5618//      registered tenant, return `{"tenants": []}` (200 OK, NOT 404)
5619//      — don't leak whether a tenant exists by 404'ing on names
5620//      outside the principal's scope.
5621//
5622// `PendingMigration` / `PendingDelete` tenants are **excluded** from the
5623// list in every case. solo-web's tenant picker should not surface a
5624// tenant that's mid-migration or queued for hard-delete — clicking
5625// such a row would race the admin tooling. The CLI's `solo tenants
5626// list` still shows them under an explicit `--include-pending` flag
5627// (out of scope here).
5628//
5629// See `docs/dev-log/0119-tenants-list-impl.md` for the full design.
5630// ---------------------------------------------------------------------------
5631
5632/// One row of the `/v1/tenants` response body. Shape mirrors
5633/// `solo_storage::TenantRecord` for the persisted fields plus the
5634/// reserved-for-future cost-numbers triple (`episode_count`,
5635/// `size_bytes`, `pct_used`) that v0.10.0 always sets to `null`.
5636#[derive(Debug, Clone, Serialize)]
5637struct TenantListItem {
5638    /// Tenant id (e.g. `"default"`, `"alice"`). Matches the
5639    /// `X-Solo-Tenant` header value clients send to other routes.
5640    id: String,
5641    /// Human-readable display name set at `solo tenants create`.
5642    /// `None` ⇒ omit from the JSON body.
5643    #[serde(skip_serializing_if = "Option::is_none")]
5644    display_name: Option<String>,
5645    /// Epoch ms when this tenant was registered.
5646    created_at_ms: i64,
5647    /// Epoch ms of the most recent `TenantRegistry::get_or_open` call
5648    /// (v0.9.0 P1). `None` for tenants that have never been opened
5649    /// since the migration ran.
5650    #[serde(skip_serializing_if = "Option::is_none")]
5651    last_accessed_ms: Option<i64>,
5652    /// Lifecycle status. Always `"active"` in the v0.10.0 wire (we
5653    /// filter `PendingMigration` / `PendingDelete` out at list time).
5654    /// Surfaced for forward-compat — a future `?include_pending=1`
5655    /// query param could relax the filter without a shape change.
5656    status: TenantStatusJson,
5657    /// Per-tenant byte quota set via `solo tenants set-quota`. `None`
5658    /// ⇒ unlimited.
5659    #[serde(skip_serializing_if = "Option::is_none")]
5660    quota_bytes: Option<u64>,
5661    /// v0.10.1: count of `episodes WHERE status='active'`. Populated
5662    /// for the first `TENANTS_COUNT_HYDRATION_CAP` tenants in the
5663    /// response; `null` for tenants beyond the cap (in which case the
5664    /// response also carries `X-Solo-Tenants-Count-Cap-Reached: true`).
5665    /// Also `null` if the per-tenant DB file is missing or the COUNT
5666    /// failed.
5667    episode_count: Option<i64>,
5668    /// v0.10.1: size of the per-tenant SQLCipher DB on disk (bytes).
5669    /// `null` only if the file is missing or unreadable (corruption /
5670    /// permissions). Not affected by the cap — `std::fs::metadata` is
5671    /// cheap.
5672    size_bytes: Option<u64>,
5673    /// v0.10.1: `(size_bytes * 100.0 / quota_bytes)` capped at `100.0`
5674    /// when both `size_bytes` and `quota_bytes` are known. `null` if
5675    /// `quota_bytes` is unset (no quota = unlimited) or `size_bytes`
5676    /// is unknown.
5677    pct_used: Option<f64>,
5678}
5679
5680/// JSON-side mirror of [`TenantStatus`]. Re-defined here (rather than
5681/// using `#[derive(Serialize)]` on `TenantStatus` directly — which it
5682/// already has via `#[serde(rename_all = "snake_case")]`) so the
5683/// HTTP-side wire shape stays decoupled from the storage-side enum.
5684/// Today both serialise identically; a future status variant added to
5685/// storage doesn't automatically leak onto the wire.
5686#[derive(Debug, Clone, Copy, Serialize)]
5687#[serde(rename_all = "snake_case")]
5688enum TenantStatusJson {
5689    Active,
5690}
5691
5692impl From<&solo_storage::TenantStatus> for TenantStatusJson {
5693    fn from(s: &solo_storage::TenantStatus) -> Self {
5694        // We only ever build this enum from `Active` records (the list
5695        // handler filters at source); the match exhausts so future
5696        // variants force a compile error here, not a wire mismatch.
5697        match s {
5698            solo_storage::TenantStatus::Active => TenantStatusJson::Active,
5699            // Defensive: should be filtered upstream. Map to Active to
5700            // avoid a panic, but the handler MUST keep filtering at
5701            // source. A clippy warning catches dead branches.
5702            solo_storage::TenantStatus::PendingMigration
5703            | solo_storage::TenantStatus::PendingDelete => TenantStatusJson::Active,
5704        }
5705    }
5706}
5707
5708/// Response body for `GET /v1/tenants`.
5709#[derive(Debug, Serialize)]
5710struct TenantsListResponse {
5711    tenants: Vec<TenantListItem>,
5712}
5713
5714/// v0.10.1: maximum number of tenants whose `episode_count` we hydrate
5715/// per `/v1/tenants` request. Opening + counting one tenant DB is
5716/// ~5-10ms; capping bounds the per-request wall to keep solo-web's
5717/// first-paint budget tight. Tenants beyond the cap get
5718/// `episode_count: null` AND the response carries
5719/// `X-Solo-Tenants-Count-Cap-Reached: true` so clients can fetch
5720/// per-tenant counts out-of-band (CLI / future per-id endpoint) for
5721/// the tail. The 50 figure mirrors the entity-cap pattern from
5722/// `/v1/graph/nodes`.
5723const TENANTS_COUNT_HYDRATION_CAP: usize = 50;
5724
5725/// v0.10.1: response header name set to `"true"` when the per-request
5726/// `episode_count` hydration cap was reached. Absent otherwise.
5727/// Grep-able by both server- and client-side code. Stored lowercase
5728/// per `axum::http::HeaderName::from_static` (header names are
5729/// case-insensitive on the wire; the canonical spelling is
5730/// `X-Solo-Tenants-Count-Cap-Reached`).
5731const X_SOLO_TENANTS_COUNT_CAP_HEADER: &str = "x-solo-tenants-count-cap-reached";
5732
5733/// `GET /v1/tenants` — list every tenant visible to the request's
5734/// principal. See module comment for the three-case visibility rule.
5735///
5736/// Errors:
5737///   * **401** — bearer required but missing/invalid (handled by
5738///     `auth_middleware` before this handler runs).
5739///   * **500** — `TenantsIndex` read failed. Surfaced via [`ApiError`].
5740///
5741/// No 404 path. If the OIDC principal's `tenant_claim` doesn't match
5742/// any registered tenant, the response is `200 OK` with `tenants:
5743/// []`. That keeps tenant existence out of side-channel range for an
5744/// OIDC user — they cannot probe for other tenants by id.
5745async fn tenants_list_handler(
5746    State(state): State<SoloHttpState>,
5747    MaybePrincipal(maybe_principal): MaybePrincipal,
5748) -> Result<Response, ApiError> {
5749    // Pull every registered tenant. `list_active` is the registry's
5750    // wrapper around `TenantsIndex::list`, which returns rows ordered
5751    // by `(created_at_ms ASC, tenant_id ASC)` — a stable order that
5752    // doesn't shift between requests, which solo-web relies on to keep
5753    // its tenant picker entries from reordering visually.
5754    let mut records = state.registry.list_active().await.map_err(ApiError::from)?;
5755
5756    // Filter at source: status MUST be Active (PendingMigration /
5757    // PendingDelete are admin-transient states that solo-web should
5758    // not surface). Matches the brief's
5759    // `tenants_status_filter_excludes_deleted` test.
5760    records.retain(|r| matches!(r.status, solo_storage::TenantStatus::Active));
5761
5762    // Apply the principal-driven visibility filter. The three cases
5763    // are exhaustive — see the module comment for the rationale on
5764    // each. `tenant_visibility_filter` is split out so the unit
5765    // tests can assert the rule independent of the SQL read.
5766    let filtered = filter_tenants_for_principal(records, maybe_principal.as_ref());
5767
5768    // v0.10.1: hydrate cost numbers (size_bytes, episode_count). The
5769    // registry helper handles missing DB files + the cap behavior. We
5770    // pass the cap so tenants beyond it return `None` for episode_count
5771    // — `size_bytes` is computed for everyone (cheap fs::metadata).
5772    let cap = TENANTS_COUNT_HYDRATION_CAP;
5773    let costs = state
5774        .registry
5775        .hydrate_tenant_cost_numbers(&filtered, cap)
5776        .await;
5777    let cap_reached = filtered.len() > cap;
5778
5779    let tenants: Vec<TenantListItem> = filtered
5780        .iter()
5781        .zip(costs.iter())
5782        .map(|(r, cost)| {
5783            let pct_used = match (cost.size_bytes, r.quota_bytes) {
5784                (Some(size), Some(quota)) if quota > 0 => {
5785                    let raw = (size as f64) * 100.0 / (quota as f64);
5786                    Some(raw.min(100.0))
5787                }
5788                _ => None,
5789            };
5790            TenantListItem {
5791                id: r.tenant_id.to_string(),
5792                display_name: r.display_name.clone(),
5793                created_at_ms: r.created_at_ms,
5794                last_accessed_ms: r.last_accessed_ms,
5795                status: TenantStatusJson::from(&r.status),
5796                quota_bytes: r.quota_bytes,
5797                episode_count: cost.episode_count,
5798                size_bytes: cost.size_bytes,
5799                pct_used,
5800            }
5801        })
5802        .collect();
5803
5804    let body = Json(TenantsListResponse { tenants });
5805    if cap_reached {
5806        let mut resp = body.into_response();
5807        resp.headers_mut().insert(
5808            axum::http::HeaderName::from_static(X_SOLO_TENANTS_COUNT_CAP_HEADER),
5809            axum::http::HeaderValue::from_static("true"),
5810        );
5811        Ok(resp)
5812    } else {
5813        Ok(body.into_response())
5814    }
5815}
5816
5817/// Pure function: apply the three-case principal-driven visibility
5818/// rule to a list of `TenantRecord`s. Extracted from the handler so
5819/// unit tests can exercise the rule without driving an axum router.
5820///
5821///   * `principal == None` ⇒ all records returned (no-auth path).
5822///   * Bearer-shaped principal (`subject == "bearer" && claims.is_null()`)
5823///     ⇒ all records returned (single-principal daemon).
5824///   * Any other principal (OIDC) ⇒ filter to records whose
5825///     `tenant_id == principal.tenant_claim`. An OIDC principal with
5826///     no `tenant_claim` (theoretically unreachable — the middleware
5827///     short-circuits at 403 before us, but we defend) returns an
5828///     empty list.
5829fn filter_tenants_for_principal(
5830    records: Vec<solo_storage::TenantRecord>,
5831    principal: Option<&AuthenticatedPrincipal>,
5832) -> Vec<solo_storage::TenantRecord> {
5833    let Some(p) = principal else {
5834        // Case 1: no auth configured — return all tenants. Same scope
5835        // as `solo tenants list`.
5836        return records;
5837    };
5838    if is_single_principal_bearer(p) {
5839        // Case 2: bearer principal — return all tenants. The single
5840        // bearer holder is functionally the daemon operator.
5841        return records;
5842    }
5843    // Case 3: OIDC principal — filter to the claimed tenant only. An
5844    // unmatched claim falls through to an empty list, NOT 404, to
5845    // avoid leaking tenant existence.
5846    let Some(claim) = p.tenant_claim.as_ref() else {
5847        return Vec::new();
5848    };
5849    records
5850        .into_iter()
5851        .filter(|r| r.tenant_id == *claim)
5852        .collect()
5853}
5854
5855/// True iff `principal` looks like a bearer-mode principal — the shape
5856/// emitted by [`AuthenticatedPrincipal::bearer`]: subject is literally
5857/// `"bearer"`, claims is `serde_json::Value::Null`, and scopes is
5858/// empty. OIDC principals carry a JWT object in `claims` and the JWT
5859/// `sub` in `subject`, so they fail this predicate.
5860///
5861/// Split out so the unit tests can assert the discriminator
5862/// independent of the rest of the handler. Keeping the predicate in
5863/// one place also makes future expansion easier — e.g., a v0.11
5864/// "admin scope" might add an OIDC variant that passes this gate by
5865/// looking for a `"solo:admin"` entry in `scopes`.
5866fn is_single_principal_bearer(principal: &AuthenticatedPrincipal) -> bool {
5867    principal.subject == "bearer" && principal.claims.is_null() && principal.scopes.is_empty()
5868}
5869
5870// ---------------------------------------------------------------------------
5871// v0.10.2 — MCP-over-HTTP transport on /mcp
5872// ---------------------------------------------------------------------------
5873
5874// v0.11.0 P2: the per-event names that used to live here as
5875// `MCP_STREAM_EVENT_INIT` moved into `crate::mcp_session` alongside the
5876// `McpEventKind` enum so the publisher (`SessionState::publish_event`)
5877// and the subscriber (`build_mcp_session_stream`) share one source of
5878// truth for the wire format. See `MCP_STREAM_EVENT_INIT_NAME`,
5879// `MCP_STREAM_EVENT_MESSAGE_NAME`, `MCP_STREAM_EVENT_PROGRESS_NAME`,
5880// `MCP_STREAM_EVENT_LAGGED_NAME`, and `MCP_STREAM_EVENT_HEARTBEAT_NAME`
5881// for the canonical strings.
5882
5883/// `POST /mcp` — JSON-RPC request/response.
5884///
5885/// v0.10.2 P2 entry point. Per the MCP Streamable HTTP transport spec,
5886/// the body is one JSON-RPC 2.0 envelope (`{jsonrpc, id, method,
5887/// params}`). The response is one JSON-RPC envelope (`{jsonrpc, id,
5888/// result}` or `{jsonrpc, id, error}`) with `Content-Type:
5889/// application/json`. **Status 200** for valid JSON-RPC (in-body
5890/// errors); **status 400** for malformed JSON; **status 401** when
5891/// auth is configured and the bearer check fails (handled by the
5892/// `auth_middleware` ahead of this handler).
5893///
5894/// Tenant resolution diverges from `solo mcp-stdio` here: stdio binds
5895/// one tenant at process start via `--tenant`. HTTP resolves the tenant
5896/// per request from the `X-Solo-Tenant` header (or
5897/// `AuthenticatedPrincipal.tenant_claim` in OIDC mode), so a single
5898/// daemon process can answer MCP calls for any tenant the registry
5899/// knows. The audit principal is `Some("bearer")` for bearer-
5900/// authenticated calls and the JWT `sub` for OIDC; `None` for
5901/// unauthenticated loopback. Documented in v0.10.2 dev log.
5902async fn mcp_http_post_handler(
5903    TenantExtractor(tenant): TenantExtractor,
5904    State(state): State<SoloHttpState>,
5905    AuditPrincipal(principal): AuditPrincipal,
5906    request: axum::extract::Request,
5907) -> Response {
5908    // v0.11.0 P1: read the session extension the middleware planted on
5909    // a hit; if absent, this is the session-init request — create one
5910    // and echo the assigned id back via `Mcp-Session-Id`.
5911    let existing_session_id: Option<crate::mcp_session::SessionId> = request
5912        .extensions()
5913        .get::<crate::mcp_session::SessionId>()
5914        .cloned();
5915    let principal_full = request
5916        .extensions()
5917        .get::<crate::auth::AuthenticatedPrincipal>()
5918        .cloned();
5919    let body_bytes = match axum::body::to_bytes(
5920        request.into_body(),
5921        // Match the 8 MiB cap solo-api already uses for JSON bodies in
5922        // other handlers (validated by `tower-http::limit::RequestBodyLimitLayer`
5923        // elsewhere). Locally we cap at 8 MiB so a malformed Content-Length
5924        // can't OOM the dispatch task.
5925        8 * 1024 * 1024,
5926    )
5927    .await
5928    {
5929        Ok(b) => b,
5930        Err(e) => {
5931            return (
5932                StatusCode::BAD_REQUEST,
5933                Json(serde_json::json!({
5934                    "error": format!("invalid request body: {e}"),
5935                    "status": 400,
5936                })),
5937            )
5938                .into_response();
5939        }
5940    };
5941    // Parse the JSON-RPC envelope. Malformed input ⇒ 400 (the spec
5942    // calls out 4xx for malformed wire input even though JSON-RPC's own
5943    // parse-error code is in-body — operator-facing tooling needs the
5944    // HTTP status to distinguish "the server rejected the request
5945    // shape" from "the method returned an error").
5946    let request: crate::mcp_dispatch::JsonRpcRequest = match serde_json::from_slice(&body_bytes) {
5947        Ok(r) => r,
5948        Err(e) => {
5949            return (
5950                StatusCode::BAD_REQUEST,
5951                Json(serde_json::json!({
5952                    "error": format!("invalid JSON-RPC request: {e}"),
5953                    "status": 400,
5954                })),
5955            )
5956                .into_response();
5957        }
5958    };
5959    if request.jsonrpc != "2.0" {
5960        return (
5961            StatusCode::BAD_REQUEST,
5962            Json(serde_json::json!({
5963                "error": format!(
5964                    "invalid JSON-RPC request: expected jsonrpc=\"2.0\", got {:?}",
5965                    request.jsonrpc
5966                ),
5967                "status": 400,
5968            })),
5969        )
5970            .into_response();
5971    }
5972
5973    // v0.11.0 P1: assign a session id if the request arrived without
5974    // one. The assigned id is echoed back via the `Mcp-Session-Id`
5975    // response header so the client can reuse it.
5976    let (session_id, freshly_assigned) = match existing_session_id {
5977        Some(id) => (id, false),
5978        None => {
5979            let new_state =
5980                crate::mcp_session::SessionState::new(tenant.tenant_id().clone(), principal_full);
5981            let id = state.mcp_sessions.insert(new_state);
5982            (id, true)
5983        }
5984    };
5985
5986    // v0.11.0 P3: resolve the `Arc<SessionState>` for the dispatcher so
5987    // per-tool progress events can be published into the session's
5988    // broadcast channel. On a session-init request we just inserted
5989    // the state; for a continuing request the middleware planted an
5990    // Arc onto the request extensions, but we lost ownership when we
5991    // consumed the request above (`request.into_body()`). Re-fetch
5992    // via `mcp_sessions.get(&session_id)` — this is a single lock-free
5993    // DashMap shard read.
5994    let session_state: Option<std::sync::Arc<crate::mcp_session::SessionState>> =
5995        state.mcp_sessions.get(&session_id);
5996
5997    // v0.11.0 P4: on a freshly-assigned session, spawn the
5998    // invalidate-bridge task that forwards per-tenant `InvalidateEvent`
5999    // broadcasts to this session's event channel as MCP
6000    // `notifications/message` envelopes. Skipped for continuing requests
6001    // because the bridge spawned at session-init is still running (the
6002    // bridge auto-exits when the session drops from the store via the
6003    // `Weak<SessionState>` upgrade-fails path).
6004    if freshly_assigned && let Some(session_state_for_bridge) = session_state.clone() {
6005        // The JoinHandle is intentionally detached — the bridge task
6006        // owns its own exit path (Weak<SessionState> upgrade fails or
6007        // tenant broadcast closes). Holding the handle would require
6008        // a per-session reaper; the bridge's own lifecycle is enough.
6009        // `drop` is the clippy-clean way to discard a future.
6010        drop(crate::mcp_notify::spawn_invalidate_bridge(
6011            tenant.clone(),
6012            session_state_for_bridge,
6013        ));
6014    }
6015
6016    // Build the dispatcher with the resolved tenant + audit principal.
6017    // Dispatcher integration is Option B per v0.11.0 P1 plan: sessions
6018    // are HTTP-transport-only; the dispatcher stays session-agnostic.
6019    let dispatcher = crate::mcp_dispatch::McpDispatcher::new(
6020        state.registry.clone(),
6021        tenant,
6022        (*state.user_aliases).clone(),
6023        principal,
6024    );
6025
6026    let mut response = match dispatcher.dispatch(request, session_state).await {
6027        Some(response) => {
6028            // JSON-RPC errors are in-body; the HTTP status is 200 for
6029            // any valid JSON-RPC request, including ones that return an
6030            // error envelope. The client distinguishes success from
6031            // error by the presence of `result` vs `error` in the body.
6032            (StatusCode::OK, Json(response)).into_response()
6033        }
6034        None => {
6035            // Notification: per JSON-RPC 2.0 §4.1 the server MUST NOT
6036            // respond. The MCP Streamable HTTP transport spec uses
6037            // 202 Accepted for this shape so client-side polling does
6038            // not block on a body.
6039            StatusCode::ACCEPTED.into_response()
6040        }
6041    };
6042    // v0.11.0 P1: stamp the `Mcp-Session-Id` response header on every
6043    // response — both freshly-assigned (so the client learns it) and
6044    // continuing (so the client confirms the id is still valid). The
6045    // spec is loose here; echoing always is the safer client contract.
6046    crate::mcp_session::set_session_id_header(response.headers_mut(), &session_id);
6047    // Tracing hook lets operators see new-session creation rate in
6048    // `solo daemon` logs without grepping body bytes.
6049    if freshly_assigned {
6050        tracing::debug!(
6051            session_id = %session_id,
6052            "mcp-http: assigned new session id"
6053        );
6054    }
6055    response
6056}
6057
6058/// Heartbeat cadence for the resumable `/mcp` GET stream. Matches the
6059/// `/v1/graph/stream` discipline (30s) so operator tooling can use one
6060/// timeout knob. v0.11.0 P2 makes this configurable indirectly via the
6061/// helper signature of [`build_mcp_session_stream`] so tests can pass
6062/// a short interval without driving the real clock for 30s.
6063pub const MCP_STREAM_HEARTBEAT_SECS: u64 = 30;
6064
6065/// `GET /mcp` — resumable Server-Sent Events stream for one MCP session.
6066///
6067/// v0.11.0 P2 replaces v0.10.2's `pending().await` stub with a real
6068/// `select!` loop over the session's broadcast event channel. Per the
6069/// MCP Streamable HTTP transport spec, the GET endpoint is the
6070/// server's path to push:
6071///
6072///   - `event: init` — handshake confirming the stream is live;
6073///   - `event: message` — JSON-RPC `notifications/message` (P4 bridge);
6074///   - `event: progress` — JSON-RPC `notifications/progress` (P3 long
6075///     tool calls);
6076///   - `event: heartbeat` — periodic liveness ping every
6077///     [`MCP_STREAM_HEARTBEAT_SECS`] seconds;
6078///   - `event: lagged` — emitted once when a reconnecting client's
6079///     `Last-Event-ID` is older than the broadcast buffer's oldest
6080///     retained event (Decision E).
6081///
6082/// Wire format per the SSE spec — each event carries:
6083/// `id: <u64>\nevent: <kind>\ndata: <json>\n\n`. The `id:` field is the
6084/// monotonic per-session event id; clients echo the last-seen value
6085/// back in the `Last-Event-ID` header on reconnect to drive the
6086/// replay-from-cursor path.
6087///
6088/// **Session id REQUIRED.** Unlike `POST /mcp` (which auto-creates a
6089/// session on the session-init request), `GET /mcp` returns `404 Not
6090/// Found` if the request arrived without a `Mcp-Session-Id` header.
6091/// The GET stream's whole point is to attach to an existing session's
6092/// notification channel — a client opening a stream without a session
6093/// to attach it to is a programming error, not the entry point to the
6094/// session lifecycle.
6095async fn mcp_http_get_handler(
6096    TenantExtractor(tenant): TenantExtractor,
6097    State(state): State<SoloHttpState>,
6098    AuditPrincipal(principal): AuditPrincipal,
6099    request: axum::extract::Request,
6100) -> Response {
6101    let _ = principal; // audit principal pre-resolved by extractor; unused on GET
6102    let _ = state; // session resolution lives in the middleware; state unused here
6103
6104    // v0.11.0 P2: session is REQUIRED on GET. The middleware planted
6105    // the SessionId + Arc<SessionState> extensions on a hit. If the
6106    // request arrived without an `Mcp-Session-Id` header, the
6107    // middleware passes through (so unauth'd POSTs can session-init);
6108    // we observe that as a missing extension and return 404 here.
6109    let session_id = match request.extensions().get::<crate::mcp_session::SessionId>() {
6110        Some(id) => id.clone(),
6111        None => {
6112            return (
6113                StatusCode::NOT_FOUND,
6114                Json(serde_json::json!({
6115                    "error": crate::mcp_session::MCP_SESSION_EXPIRED_ERROR,
6116                    "status": 404,
6117                    "message": "GET /mcp requires an `Mcp-Session-Id` header \
6118                                from a prior POST /mcp; open one first",
6119                    "retry": "re-initialize",
6120                })),
6121            )
6122                .into_response();
6123        }
6124    };
6125    let session_state = match request
6126        .extensions()
6127        .get::<std::sync::Arc<crate::mcp_session::SessionState>>()
6128    {
6129        Some(state) => state.clone(),
6130        None => {
6131            // Defensive: middleware should plant both extensions
6132            // together or neither, but log + 404 if we somehow see one
6133            // without the other.
6134            tracing::error!(
6135                "mcp_http_get_handler: SessionId extension present but \
6136                 SessionState extension missing — middleware bug"
6137            );
6138            return StatusCode::INTERNAL_SERVER_ERROR.into_response();
6139        }
6140    };
6141
6142    // Optional `Last-Event-ID` header — parse as u64; on parse failure
6143    // treat as `0` (the "never seen anything" sentinel) so a
6144    // malformed header doesn't 400 the reconnect.
6145    let last_event_id: u64 = request
6146        .headers()
6147        .get(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER)
6148        .and_then(|v| v.to_str().ok())
6149        .and_then(|s| s.trim().parse::<u64>().ok())
6150        .unwrap_or(0);
6151
6152    let tenant_id = tenant.tenant_id().to_string();
6153    let stream = build_mcp_session_stream(
6154        session_state,
6155        session_id.clone(),
6156        tenant_id,
6157        last_event_id,
6158        MCP_STREAM_HEARTBEAT_SECS,
6159    );
6160    // No axum keep-alive comment lines — we ship our own typed
6161    // `heartbeat` event the way `/v1/graph/stream` does. Setting the
6162    // axum-side interval to 1 hour effectively disables it; clients
6163    // distinguish liveness via the typed events on the stream.
6164    let sse = Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)));
6165    let mut response = sse.into_response();
6166    crate::mcp_session::set_session_id_header(response.headers_mut(), &session_id);
6167    response
6168}
6169
6170/// Per-subscriber state threaded through `futures::stream::unfold` for
6171/// the resumable `/mcp` GET stream. Carries the broadcast receiver +
6172/// heartbeat ticker + the queue of replay events (if any) that need
6173/// to drain before live events start flowing.
6174///
6175/// Holds an `Arc<SessionState>` for the SOLE purpose of keeping the
6176/// broadcast `Sender` alive for as long as this subscriber's stream
6177/// is open. Without that strong ref, dropping the only Arc the
6178/// handler held would close the channel and `rx.recv()` would
6179/// immediately return `Err(Closed)` — the integration test for the
6180/// heartbeat cadence caught this regression.
6181struct McpStreamState {
6182    /// Live event receiver subscribed BEFORE the replay snapshot is
6183    /// drained — so any event published during the replay window
6184    /// lands here and the subscriber's `last_replayed_id` cursor
6185    /// dedupes it against the replayed copy.
6186    rx: broadcast::Receiver<crate::mcp_session::McpStreamEvent>,
6187    /// Heartbeat tick — fires every `heartbeat_secs` regardless of
6188    /// real-event volume. Matches `/v1/graph/stream`'s discipline.
6189    heartbeat: tokio::time::Interval,
6190    /// FIFO queue of replay events still to emit before live events
6191    /// take over. Empties to `Vec::new()` after the last drain.
6192    replay_queue: Vec<crate::mcp_session::McpStreamEvent>,
6193    /// `Some(id)` once at least one event has been emitted (replayed
6194    /// OR live). Live broadcast events with `id <= last_emitted_id`
6195    /// are skipped — handles the race where an event lands in BOTH
6196    /// the replay snapshot AND the live broadcast receiver (because
6197    /// we subscribed before snapshotting).
6198    last_emitted_id: Option<u64>,
6199    /// `true` until the synthetic `event: init` has been emitted.
6200    /// Flipped to `false` on first poll.
6201    needs_init: bool,
6202    /// Init-event payload metadata. Pre-computed at handler entry so
6203    /// the unfold closure stays `Send`.
6204    session_id_str: String,
6205    tenant_id: String,
6206    /// Held only to keep the broadcast `Sender` (and thus the channel)
6207    /// alive for the stream's lifetime. The session store also holds
6208    /// an Arc, but that one expires under TTL — this Arc keeps the
6209    /// channel open for this single subscriber for as long as the
6210    /// client is connected.
6211    _session_state: std::sync::Arc<crate::mcp_session::SessionState>,
6212}
6213
6214/// Build the resumable SSE stream for one `/mcp` GET subscriber.
6215///
6216/// Flow per `unfold` iteration:
6217///
6218///   1. **needs_init poll** — emit one `event: init` with id 0 (we
6219///      never allocate event id 0 in `SessionState::publish_event`;
6220///      0 is reserved for the init event + the client's "never seen"
6221///      sentinel on `Last-Event-ID`). Returns immediately.
6222///   2. **replay drain** — while `replay_queue` is non-empty, pop the
6223///      front entry and emit it. Updates `last_emitted_id`.
6224///   3. **live select** — `tokio::select!` between
6225///      `rx.recv()` and `heartbeat.tick()`:
6226///      - `rx.recv() = Ok(event)` and `event.id > last_emitted_id` →
6227///        emit and update cursor;
6228///      - `rx.recv() = Ok(event)` and `event.id <= last_emitted_id` →
6229///        skip (dedupe overlap with the replayed copy);
6230///      - `rx.recv() = Err(Lagged(n))` → emit one synthetic
6231///        `event: lagged` with `data: {dropped: n}` and continue;
6232///      - `rx.recv() = Err(Closed)` → end the stream (session
6233///        dropped);
6234///      - `heartbeat.tick()` → emit an unaccounted-id `event: heartbeat`
6235///        (heartbeats DO NOT consume the session's event id space —
6236///        they're synthetic and idempotent, so a reconnecting client
6237///        doesn't need to see them in replay).
6238///
6239/// Heartbeats use SSE event id `0` (the same id space the init event
6240/// uses) and clients filter them client-side; the broadcast-channel
6241/// events use the session's real monotonic ids.
6242fn build_mcp_session_stream(
6243    session_state: std::sync::Arc<crate::mcp_session::SessionState>,
6244    session_id: crate::mcp_session::SessionId,
6245    tenant_id: String,
6246    last_event_id: u64,
6247    heartbeat_secs: u64,
6248) -> impl Stream<Item = Result<Event, Infallible>> {
6249    // 1. Subscribe BEFORE snapshotting so any event published during
6250    //    the snapshot window lands in the live receiver. We dedupe
6251    //    overlap against `last_emitted_id` below.
6252    let rx = session_state.subscribe_events();
6253
6254    // 2. Snapshot the replay buffer, then filter to events the client
6255    //    hasn't seen.
6256    let snapshot = session_state.snapshot_replay_buffer();
6257
6258    // 3. Decide replay shape based on `last_event_id` vs the snapshot.
6259    let mut replay_queue: Vec<crate::mcp_session::McpStreamEvent> = Vec::new();
6260    if last_event_id > 0 {
6261        // Client is reconnecting with a known cursor.
6262        let oldest_in_buffer = snapshot.first().map(|e| e.id);
6263        let newest_in_buffer = snapshot.last().map(|e| e.id);
6264        if let (Some(oldest), Some(newest)) = (oldest_in_buffer, newest_in_buffer) {
6265            if last_event_id + 1 < oldest {
6266                // Client missed events that have since been evicted
6267                // from the buffer. Emit one synthetic `event: lagged`
6268                // describing the gap, then resume from the buffer.
6269                let dropped = oldest.saturating_sub(last_event_id + 1);
6270                replay_queue.push(crate::mcp_session::McpStreamEvent {
6271                    id: 0,
6272                    event: crate::mcp_session::McpEventKind::Lagged,
6273                    data: serde_json::json!({
6274                        "dropped": dropped,
6275                        "last_event_id": last_event_id,
6276                        "oldest_available": oldest,
6277                    }),
6278                });
6279                replay_queue.extend(snapshot);
6280            } else if last_event_id >= newest {
6281                // Client is already caught up; nothing to replay.
6282            } else {
6283                replay_queue.extend(snapshot.into_iter().filter(|e| e.id > last_event_id));
6284            }
6285        }
6286        // Empty snapshot + non-zero last_event_id: nothing to replay.
6287    }
6288    // last_event_id == 0: brand-new subscriber; no replay needed
6289    // (the `init` event below is the start of the stream from the
6290    // client's POV).
6291
6292    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
6293    let heartbeat = tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));
6294
6295    let stream_state = McpStreamState {
6296        rx,
6297        heartbeat,
6298        replay_queue,
6299        last_emitted_id: None,
6300        needs_init: true,
6301        session_id_str: session_id.to_string(),
6302        tenant_id,
6303        _session_state: session_state,
6304    };
6305
6306    futures::stream::unfold(stream_state, move |mut state| async move {
6307        // Phase 1: init event (one-shot).
6308        if state.needs_init {
6309            state.needs_init = false;
6310            let init_payload = serde_json::json!({
6311                "connected": true,
6312                "session_id": state.session_id_str,
6313                "tenant_id": state.tenant_id,
6314                "ts_ms": chrono::Utc::now().timestamp_millis(),
6315            });
6316            let ev = build_mcp_sse_event(0, crate::mcp_session::McpEventKind::Init, &init_payload);
6317            return Some((Ok::<Event, Infallible>(ev), state));
6318        }
6319        // Phase 2: replay-queue drain (one entry per poll).
6320        if !state.replay_queue.is_empty() {
6321            let entry = state.replay_queue.remove(0);
6322            // Lagged synthetic entries don't bump last_emitted_id —
6323            // they have id 0 and consuming them as the cursor would
6324            // cause every subsequent live event to dedupe against
6325            // them. Real events DO bump the cursor.
6326            if entry.event != crate::mcp_session::McpEventKind::Lagged {
6327                state.last_emitted_id = Some(entry.id);
6328            }
6329            let ev = build_mcp_sse_event(entry.id, entry.event, &entry.data);
6330            return Some((Ok::<Event, Infallible>(ev), state));
6331        }
6332        // Phase 3: live select loop.
6333        loop {
6334            tokio::select! {
6335                event = state.rx.recv() => {
6336                    match event {
6337                        Ok(ev) => {
6338                            // Dedupe against the replay overlap: any
6339                            // event whose id we've already emitted
6340                            // (because it was in the replay snapshot)
6341                            // gets skipped here.
6342                            if let Some(last) = state.last_emitted_id
6343                                && ev.id <= last
6344                            {
6345                                continue;
6346                            }
6347                            state.last_emitted_id = Some(ev.id);
6348                            let sse = build_mcp_sse_event(ev.id, ev.event, &ev.data);
6349                            return Some((Ok::<Event, Infallible>(sse), state));
6350                        }
6351                        Err(broadcast::error::RecvError::Lagged(n)) => {
6352                            // Live subscriber drifted past the
6353                            // broadcast buffer's capacity. Emit one
6354                            // synthetic `event: lagged` and resume —
6355                            // clients re-fetch state on this signal.
6356                            tracing::warn!(
6357                                lagged = n,
6358                                session_id = %state.session_id_str,
6359                                "mcp GET stream subscriber lagged"
6360                            );
6361                            let lagged_payload = serde_json::json!({
6362                                "dropped": n,
6363                            });
6364                            let sse = build_mcp_sse_event(
6365                                0,
6366                                crate::mcp_session::McpEventKind::Lagged,
6367                                &lagged_payload,
6368                            );
6369                            return Some((Ok::<Event, Infallible>(sse), state));
6370                        }
6371                        Err(broadcast::error::RecvError::Closed) => {
6372                            tracing::debug!(
6373                                session_id = %state.session_id_str,
6374                                "mcp GET stream broadcast closed; ending SSE stream"
6375                            );
6376                            return None;
6377                        }
6378                    }
6379                }
6380                _ = state.heartbeat.tick() => {
6381                    let hb_payload = serde_json::json!({
6382                        "ts_ms": chrono::Utc::now().timestamp_millis(),
6383                    });
6384                    let sse = build_mcp_sse_event(
6385                        0,
6386                        crate::mcp_session::McpEventKind::Heartbeat,
6387                        &hb_payload,
6388                    );
6389                    return Some((Ok::<Event, Infallible>(sse), state));
6390                }
6391            }
6392        }
6393    })
6394}
6395
6396/// Build an SSE [`Event`] from a `(id, kind, payload)` triple. Falls
6397/// back to an event-only frame on JSON serialisation failure (matches
6398/// `/v1/graph/stream`'s defensive pattern).
6399fn build_mcp_sse_event(
6400    id: u64,
6401    kind: crate::mcp_session::McpEventKind,
6402    data: &serde_json::Value,
6403) -> Event {
6404    Event::default()
6405        .id(id.to_string())
6406        .event(kind.as_str())
6407        .json_data(data)
6408        .unwrap_or_else(|_| Event::default().id(id.to_string()).event(kind.as_str()))
6409}
6410
6411// ---------------------------------------------------------------------------
6412// Error mapping
6413// ---------------------------------------------------------------------------
6414
6415#[derive(Debug)]
6416pub struct ApiError {
6417    status: StatusCode,
6418    message: String,
6419}
6420
6421impl ApiError {
6422    fn bad_request(msg: impl Into<String>) -> Self {
6423        Self {
6424            status: StatusCode::BAD_REQUEST,
6425            message: msg.into(),
6426        }
6427    }
6428    fn not_found(msg: impl Into<String>) -> Self {
6429        Self {
6430            status: StatusCode::NOT_FOUND,
6431            message: msg.into(),
6432        }
6433    }
6434    fn internal(msg: impl Into<String>) -> Self {
6435        Self {
6436            status: StatusCode::INTERNAL_SERVER_ERROR,
6437            message: msg.into(),
6438        }
6439    }
6440}
6441
6442impl From<solo_core::Error> for ApiError {
6443    fn from(e: solo_core::Error) -> Self {
6444        use solo_core::Error;
6445        match e {
6446            Error::NotFound(msg) => ApiError::not_found(msg),
6447            Error::InvalidInput(msg) => ApiError::bad_request(msg),
6448            Error::Conflict(msg) => Self {
6449                status: StatusCode::CONFLICT,
6450                message: msg,
6451            },
6452            other => ApiError::internal(other.to_string()),
6453        }
6454    }
6455}
6456
6457impl IntoResponse for ApiError {
6458    fn into_response(self) -> Response {
6459        let body = serde_json::json!({
6460            "error": self.message,
6461            "status": self.status.as_u16(),
6462        });
6463        (self.status, Json(body)).into_response()
6464    }
6465}
6466
6467// SQL helper for recall used to live here; consolidated into
6468// solo_query::recall.
6469
6470#[cfg(test)]
6471mod handler_tests {
6472    //! In-process integration tests for the HTTP handler surface. We
6473    //! drive the axum Router directly via `tower::ServiceExt::oneshot`
6474    //! — no real TCP listener needed. Same `Harness`-shape as the MCP
6475    //! tests: real WriterActor + ReaderPool + StubEmbedder + StubVectorIndex.
6476    //!
6477    //! Tests live inline in this module rather than in a `tests/` dir
6478    //! because external integration-test exes triggered Windows UAC
6479    //! ERROR_ELEVATION_REQUIRED on the dev machine.
6480    use super::*;
6481    use axum::body::Body;
6482    use axum::http::{Request, StatusCode};
6483    use http_body_util::BodyExt;
6484    use serde_json::{Value, json};
6485    use solo_core::VectorIndex;
6486    use solo_storage::test_support::StubVectorIndex;
6487    use solo_storage::{
6488        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig, StubEmbedder,
6489        TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
6490    };
6491    use std::sync::Arc as StdArc;
6492    use tower::ServiceExt;
6493
6494    fn fake_config(dim: u32) -> SoloConfig {
6495        SoloConfig {
6496            schema_version: 1,
6497            salt_hex: "00000000000000000000000000000000".to_string(),
6498            embedder: EmbedderConfig {
6499                name: "stub".to_string(),
6500                version: "v1".to_string(),
6501                dim,
6502                dtype: "f32".to_string(),
6503            },
6504            identity: IdentityConfig::default(),
6505            documents: solo_storage::DocumentConfig::default(),
6506            auth: None,
6507            audit: solo_storage::AuditSettings::default(),
6508            redaction: solo_storage::RedactionConfig::default(),
6509            llm: None,
6510            triples: solo_storage::TriplesConfig::default(),
6511            sampling: solo_storage::SamplingConfig::default(),
6512            steward: solo_storage::StewardSettings::default(),
6513        }
6514    }
6515
6516    struct Harness {
6517        router: axum::Router,
6518        _tmp: tempfile::TempDir,
6519        db_path: std::path::PathBuf,
6520        write_handle_extra: Option<solo_storage::WriteHandle>,
6521        join: Option<std::thread::JoinHandle<()>>,
6522        /// v0.10.0: handle to the per-tenant TenantHandle so SSE-flavoured
6523        /// tests can call `harness.invalidate_sender().send(...)` to
6524        /// simulate writer-actor invalidations (or grab a Receiver via
6525        /// `.subscribe()` for subscriber-count assertions).
6526        tenant_handle: StdArc<TenantHandle>,
6527        /// v0.10.0: clone of the registry Arc so `/v1/tenants` tests can
6528        /// seed additional tenant rows into the in-memory tenants_index
6529        /// stub via `registry.with_index(|idx| idx.register(...))`.
6530        registry: StdArc<TenantRegistry>,
6531        /// v0.11.0 P1: clone of the per-process MCP session store so
6532        /// tests can simulate TTL eviction (`delete` an id) without
6533        /// having to drive the full 30-min inactivity clock.
6534        mcp_sessions: crate::mcp_session::SessionStore,
6535    }
6536
6537    impl Harness {
6538        /// v0.10.0: clone the per-tenant broadcast Sender so tests can
6539        /// fire `InvalidateEvent`s directly without going through the
6540        /// writer-actor. The harness's writer is spawned via
6541        /// `WriterActor::spawn_full` (legacy variant, no invalidate
6542        /// plumb) so writer-driven events won't reach SSE subscribers
6543        /// in tests — tests use this Sender to simulate them.
6544        fn invalidate_sender(&self) -> tokio::sync::broadcast::Sender<InvalidateEvent> {
6545            self.tenant_handle.invalidate_sender().clone()
6546        }
6547    }
6548
6549    impl Harness {
6550        fn new(runtime: &tokio::runtime::Runtime) -> Self {
6551            Self::new_with_auth(runtime, None)
6552        }
6553
6554        /// Open a fresh side connection against the harness's DB. Used
6555        /// by graph_expand tests to seed clusters / triples / documents
6556        /// directly (the writer-actor doesn't expose those write paths).
6557        fn open_db(&self) -> rusqlite::Connection {
6558            solo_storage::test_support::open_test_db_at(&self.db_path)
6559        }
6560
6561        fn new_with_auth(runtime: &tokio::runtime::Runtime, bearer_token: Option<String>) -> Self {
6562            Self::new_with_auth_config(
6563                runtime,
6564                bearer_token.map(|token| crate::auth::AuthConfig::Bearer { token }),
6565            )
6566        }
6567
6568        fn new_with_auth_config(
6569            runtime: &tokio::runtime::Runtime,
6570            auth: Option<crate::auth::AuthConfig>,
6571        ) -> Self {
6572            use solo_storage::embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};
6573
6574            let tmp = tempfile::TempDir::new().unwrap();
6575            let dim = 16usize;
6576            let hnsw: StdArc<dyn VectorIndex + Send + Sync> =
6577                StdArc::new(StubVectorIndex::new(dim));
6578            let embedder: StdArc<dyn solo_core::Embedder> =
6579                StdArc::new(StubEmbedder::new("stub", "v1", dim));
6580            let path = tmp.path().join("test.db");
6581
6582            let embedder_id = {
6583                let conn = solo_storage::test_support::open_test_db_at(&path);
6584                get_or_insert_embedder_id(
6585                    &conn,
6586                    &EmbedderIdentity {
6587                        name: "stub".into(),
6588                        version: "v1".into(),
6589                        dim: dim as u32,
6590                        dtype: "f32".into(),
6591                    },
6592                )
6593                .unwrap()
6594            };
6595
6596            let conn = solo_storage::test_support::open_test_db_at(&path);
6597            let WriterSpawn { handle, join } =
6598                WriterActor::spawn_full(conn, hnsw.clone(), tmp.path().to_path_buf(), embedder_id);
6599            let pool: ReaderPool =
6600                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });
6601
6602            // Build a TenantHandle from the assembled parts and wrap it
6603            // in a single-tenant test registry.
6604            let tenant_id = solo_core::TenantId::default_tenant();
6605            let tenant_handle = StdArc::new(TenantHandle::from_parts_for_tests(
6606                tenant_id.clone(),
6607                fake_config(dim as u32),
6608                path.clone(),
6609                tmp.path().to_path_buf(),
6610                embedder_id,
6611                hnsw,
6612                embedder.clone(),
6613                handle.clone(),
6614                // The harness owns ANOTHER WriteHandle clone + the join.
6615                // We give the TenantHandle a dummy join that immediately
6616                // returns — it never gets joined because shutdown_all
6617                // can't get exclusive Arc ownership when the harness
6618                // also holds a writer clone.
6619                std::thread::spawn(|| {}),
6620                pool,
6621            ));
6622            let tenant_handle_clone = tenant_handle.clone();
6623
6624            // Suppress the auto-spawned dummy thread by letting it finish.
6625            // We DON'T put the real `join` into the TenantHandle because
6626            // we keep our own clone of `handle` for the shutdown path.
6627            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
6628            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
6629                tmp.path().to_path_buf(),
6630                key,
6631                embedder,
6632                tenant_handle,
6633            ));
6634            let registry_clone = registry.clone();
6635
6636            // v0.11.0 P1: build the MCP session store inside the
6637            // harness runtime so the background sweep task's
6638            // `tokio::spawn` finds a runtime context. The store is
6639            // cheap to construct; the spawn happens once on `new()`.
6640            let mcp_sessions = runtime.block_on(async { crate::mcp_session::SessionStore::new() });
6641            let mcp_sessions_clone = mcp_sessions.clone();
6642            let state = SoloHttpState {
6643                registry,
6644                default_tenant: tenant_id,
6645                user_aliases: Arc::new(Vec::new()),
6646                mcp_sessions,
6647            };
6648            let router = router_with_auth_config(state, auth);
6649            Harness {
6650                router,
6651                _tmp: tmp,
6652                db_path: path,
6653                write_handle_extra: Some(handle),
6654                join: Some(join),
6655                tenant_handle: tenant_handle_clone,
6656                registry: registry_clone,
6657                mcp_sessions: mcp_sessions_clone,
6658            }
6659        }
6660
6661        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
6662            let join = self.join.take();
6663            let extra = self.write_handle_extra.take();
6664            // v0.10.0: the new `tenant_handle` Harness field holds another
6665            // `Arc<TenantHandle>` that owns its own WriteHandle clone.
6666            // We must drop our reference here so the inner WriteHandle
6667            // can be released when the registry drops below. Without
6668            // this, the writer thread's mpsc never closes and the join
6669            // times out at 5s.
6670            let tenant_handle = self.tenant_handle;
6671            // v0.10.0: same story for the new `registry` Arc clone the
6672            // tenants-list tests use to seed extra index rows — the
6673            // state inside the router holds one Arc, this is the
6674            // other; both must drop before the underlying registry
6675            // dies and releases its index-mutex / cached handles.
6676            let registry = self.registry;
6677            runtime.block_on(async move {
6678                drop(extra);
6679                drop(tenant_handle); // drop Harness's direct tenant Arc
6680                drop(registry); // drop Harness's direct registry Arc
6681                drop(self.router); // drops state → drops pool inside runtime ctx
6682                drop(self._tmp);
6683                if let Some(join) = join {
6684                    let (tx, rx) = std::sync::mpsc::channel();
6685                    std::thread::spawn(move || {
6686                        let _ = tx.send(join.join());
6687                    });
6688                    tokio::task::spawn_blocking(move || {
6689                        rx.recv_timeout(std::time::Duration::from_secs(5))
6690                    })
6691                    .await
6692                    .expect("blocking task")
6693                    .expect("writer thread did not exit within 5s")
6694                    .expect("writer thread panicked");
6695                }
6696            });
6697        }
6698    }
6699
6700    fn rt() -> tokio::runtime::Runtime {
6701        tokio::runtime::Builder::new_multi_thread()
6702            .worker_threads(2)
6703            .enable_all()
6704            .build()
6705            .unwrap()
6706    }
6707
6708    /// Issue one HTTP request through the router and capture status +
6709    /// JSON body. `body` may be `None` for GET/DELETE; `auth` adds an
6710    /// `Authorization` header value verbatim (e.g. `"Bearer xyz"`).
6711    async fn call(
6712        router: axum::Router,
6713        method: &str,
6714        uri: &str,
6715        body: Option<Value>,
6716    ) -> (StatusCode, Value) {
6717        call_with_auth(router, method, uri, body, None).await
6718    }
6719
6720    async fn call_with_auth(
6721        router: axum::Router,
6722        method: &str,
6723        uri: &str,
6724        body: Option<Value>,
6725        auth: Option<&str>,
6726    ) -> (StatusCode, Value) {
6727        let mut req_builder = Request::builder()
6728            .method(method)
6729            .uri(uri)
6730            .header("content-type", "application/json");
6731        if let Some(a) = auth {
6732            req_builder = req_builder.header("authorization", a);
6733        }
6734        let req = if let Some(b) = body {
6735            let bytes = serde_json::to_vec(&b).unwrap();
6736            req_builder.body(Body::from(bytes)).unwrap()
6737        } else {
6738            req_builder = req_builder.header("content-length", "0");
6739            req_builder.body(Body::empty()).unwrap()
6740        };
6741        let resp = router.oneshot(req).await.expect("oneshot");
6742        let status = resp.status();
6743        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
6744        let v: Value = if body_bytes.is_empty() {
6745            Value::Null
6746        } else {
6747            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
6748        };
6749        (status, v)
6750    }
6751
6752    async fn call_with_tenant(
6753        router: axum::Router,
6754        method: &str,
6755        uri: &str,
6756        body: Option<Value>,
6757        tenant: &str,
6758    ) -> (StatusCode, Value) {
6759        let mut req_builder = Request::builder()
6760            .method(method)
6761            .uri(uri)
6762            .header("content-type", "application/json")
6763            .header("x-solo-tenant", tenant);
6764        let req = if let Some(b) = body {
6765            let bytes = serde_json::to_vec(&b).unwrap();
6766            req_builder.body(Body::from(bytes)).unwrap()
6767        } else {
6768            req_builder = req_builder.header("content-length", "0");
6769            req_builder.body(Body::empty()).unwrap()
6770        };
6771        let resp = router.oneshot(req).await.expect("oneshot");
6772        let status = resp.status();
6773        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
6774        let v: Value = if body_bytes.is_empty() {
6775            Value::Null
6776        } else {
6777            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
6778        };
6779        (status, v)
6780    }
6781
6782    #[test]
6783    fn health_returns_ok() {
6784        let runtime = rt();
6785        let h = Harness::new(&runtime);
6786        let r = h.router.clone();
6787        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
6788        assert_eq!(status, StatusCode::OK);
6789        h.shutdown(&runtime);
6790    }
6791
6792    /// `GET /openapi.json` returns a parseable OpenAPI 3.x document with
6793    /// the four `memory.*` endpoints + their request/response schemas.
6794    /// Acts as a drift detector: if a future commit adds/removes a route
6795    /// without updating `openapi_spec`, this test fails loudly.
6796    #[test]
6797    fn openapi_json_describes_all_endpoints() {
6798        let runtime = rt();
6799        let h = Harness::new(&runtime);
6800        let r = h.router.clone();
6801        let (status, spec) = runtime.block_on(call(r, "GET", "/openapi.json", None));
6802        assert_eq!(status, StatusCode::OK);
6803        assert!(spec.is_object(), "openapi.json must be a JSON object");
6804
6805        // Top-level shape per OpenAPI 3.1.
6806        assert!(
6807            spec.get("openapi")
6808                .and_then(|v| v.as_str())
6809                .is_some_and(|s| s.starts_with("3.")),
6810            "missing or wrong openapi version: {spec}"
6811        );
6812        assert!(spec.pointer("/info/title").is_some());
6813        assert!(spec.pointer("/info/version").is_some());
6814
6815        // Every route the router serves must be documented.
6816        let paths = spec
6817            .get("paths")
6818            .and_then(|v| v.as_object())
6819            .expect("paths must be an object");
6820        for expected in [
6821            "/health",
6822            "/openapi.json",
6823            "/memory",
6824            "/memory/search",
6825            "/memory/context",
6826            "/memory/consolidate",
6827            "/memory/{id}",
6828            "/backup",
6829            // Path 1 derived-layer endpoints (v0.4.0+):
6830            "/memory/themes",
6831            "/memory/facts_about",
6832            "/memory/entities",
6833            "/memory/contradictions",
6834            "/memory/contradictions/resolve",
6835            // v0.5.0 Priority 3:
6836            "/memory/clusters/{cluster_id}",
6837            // v0.7.0 P6 — document operations:
6838            "/memory/documents",
6839            "/memory/documents/search",
6840            "/memory/documents/{id}",
6841            // v0.9.x/v0.10.x solo-web graph + tenant endpoints:
6842            "/v1/graph/expand",
6843            "/v1/graph/nodes",
6844            "/v1/graph/edges",
6845            "/v1/graph/inspect/{id}",
6846            "/v1/graph/neighbors/{id}",
6847            "/v1/graph/stream",
6848            "/v1/status",
6849            "/v1/tenants",
6850            // v0.10.2+ MCP Streamable HTTP transport:
6851            "/mcp",
6852        ] {
6853            assert!(
6854                paths.contains_key(expected),
6855                "openapi paths missing {expected}: {paths:?}"
6856            );
6857        }
6858
6859        // Method coverage on /memory/documents: must document both POST
6860        // (ingest) and GET (list).
6861        let docs = paths.get("/memory/documents").expect("/memory/documents");
6862        assert!(
6863            docs.get("post").is_some(),
6864            "POST /memory/documents undocumented"
6865        );
6866        assert!(
6867            docs.get("get").is_some(),
6868            "GET /memory/documents undocumented"
6869        );
6870
6871        // Method coverage on /memory/documents/{id}: must document both
6872        // GET (inspect) and DELETE (forget).
6873        let docid = paths
6874            .get("/memory/documents/{id}")
6875            .expect("/memory/documents/{id}");
6876        assert!(
6877            docid.get("get").is_some(),
6878            "GET /memory/documents/{{id}} undocumented"
6879        );
6880        assert!(
6881            docid.get("delete").is_some(),
6882            "DELETE /memory/documents/{{id}} undocumented"
6883        );
6884
6885        // Method coverage on /memory/{id}: must document GET (inspect),
6886        // PATCH (update), and DELETE (forget).
6887        let memid = paths.get("/memory/{id}").expect("memory/{id}");
6888        assert!(
6889            memid.get("get").is_some(),
6890            "GET /memory/{{id}} undocumented"
6891        );
6892        assert!(
6893            memid.get("patch").is_some(),
6894            "PATCH /memory/{{id}} undocumented"
6895        );
6896        assert!(
6897            memid.get("delete").is_some(),
6898            "DELETE /memory/{{id}} undocumented"
6899        );
6900
6901        // Component schemas referenced from paths must be defined.
6902        for schema_name in [
6903            "RememberRequest",
6904            "RememberResponse",
6905            "RecallRequest",
6906            "RecallResult",
6907            "MemoryContextRequest",
6908            "MemoryContextResult",
6909            "MemoryUpdateRequest",
6910            "MemoryUpdateResult",
6911            "EpisodeRecord",
6912            "ApiError",
6913            "ConsolidationScope",
6914            "ConsolidationReport",
6915            // Path 1 derived-layer schemas (v0.4.0+):
6916            "ThemeHit",
6917            "FactHit",
6918            "EntityHit",
6919            "ContradictionHit",
6920            "ContradictionResolveRequest",
6921            "ContradictionResolution",
6922            // v0.5.0 Priority 3:
6923            "ClusterRecord",
6924            // v0.7.0 P6 — document schemas:
6925            "IngestDocumentRequest",
6926            "IngestReport",
6927            "ForgetDocumentReport",
6928            "SearchDocsRequest",
6929            "DocSearchHit",
6930            "DocumentInspectResult",
6931            "DocumentSummary",
6932            // solo-web graph + tenant schemas:
6933            "GraphNode",
6934            "GraphEdge",
6935            "GraphResponse",
6936            "GraphNodesResponse",
6937            "GraphEdgesResponse",
6938            "GraphInspectResponse",
6939            "TenantListItem",
6940            "TenantsListResponse",
6941            "StatusResponse",
6942            // MCP HTTP JSON-RPC schemas:
6943            "JsonRpcRequest",
6944            "JsonRpcResponse",
6945        ] {
6946            let ptr = format!("/components/schemas/{schema_name}");
6947            assert!(
6948                spec.pointer(&ptr).is_some(),
6949                "component schema {schema_name} missing"
6950            );
6951        }
6952
6953        let mcp = paths.get("/mcp").expect("/mcp");
6954        assert!(mcp.get("post").is_some(), "POST /mcp undocumented");
6955        assert!(mcp.get("get").is_some(), "GET /mcp undocumented");
6956
6957        let tenants = paths.get("/v1/tenants").expect("/v1/tenants");
6958        assert!(tenants.get("get").is_some(), "GET /v1/tenants undocumented");
6959
6960        let status_path = paths.get("/v1/status").expect("/v1/status");
6961        let status_get = status_path.get("get").expect("GET /v1/status undocumented");
6962        assert_eq!(
6963            status_get.pointer("/responses/200/content/application~1json/schema/$ref"),
6964            Some(&json!("#/components/schemas/StatusResponse")),
6965            "GET /v1/status must return StatusResponse"
6966        );
6967
6968        let status_schema = spec
6969            .pointer("/components/schemas/StatusResponse")
6970            .expect("StatusResponse schema");
6971        for field in [
6972            "ok",
6973            "version",
6974            "tenant",
6975            "embedder",
6976            "active_tenants",
6977            "mcp",
6978        ] {
6979            assert!(
6980                status_schema
6981                    .pointer("/required")
6982                    .and_then(|v| v.as_array())
6983                    .is_some_and(|required| required.iter().any(|v| v == field)),
6984                "StatusResponse missing required field {field}"
6985            );
6986        }
6987        for ptr in [
6988            "/properties/tenant/required",
6989            "/properties/embedder/required",
6990            "/properties/mcp/required",
6991            "/properties/embedder/properties/dim/minimum",
6992            "/properties/mcp/properties/sessions/minimum",
6993        ] {
6994            assert!(
6995                status_schema.pointer(ptr).is_some(),
6996                "StatusResponse schema missing {ptr}"
6997            );
6998        }
6999
7000        // bearerAuth security scheme is declared (LAN deployments need it).
7001        assert!(
7002            spec.pointer("/components/securitySchemes/bearerAuth")
7003                .is_some(),
7004            "bearerAuth security scheme missing"
7005        );
7006
7007        h.shutdown(&runtime);
7008    }
7009
7010    /// `/openapi.json` must remain unauthenticated even when bearer auth
7011    /// is enabled — the spec describes the API shape, not secrets, and
7012    /// codegen tooling shouldn't need a credential to fetch it.
7013    #[test]
7014    fn openapi_json_is_exempt_from_bearer_auth() {
7015        let runtime = rt();
7016        let h = Harness::new_with_auth(&runtime, Some("super-secret".into()));
7017        let r = h.router.clone();
7018        // No Authorization header → still 200 for /openapi.json.
7019        let (status, _body) = runtime.block_on(call(r, "GET", "/openapi.json", None));
7020        assert_eq!(status, StatusCode::OK);
7021        h.shutdown(&runtime);
7022    }
7023
7024    #[test]
7025    fn remember_returns_memory_id() {
7026        let runtime = rt();
7027        let h = Harness::new(&runtime);
7028        let r = h.router.clone();
7029        let (status, body) = runtime.block_on(call(
7030            r,
7031            "POST",
7032            "/memory",
7033            Some(json!({ "content": "http harness test" })),
7034        ));
7035        assert_eq!(status, StatusCode::OK);
7036        let mid = body.get("memory_id").and_then(|v| v.as_str()).unwrap();
7037        assert_eq!(mid.len(), 36, "uuid length");
7038        h.shutdown(&runtime);
7039    }
7040
7041    #[test]
7042    fn update_memory_rewrites_content_and_inspect_sees_it() {
7043        let runtime = rt();
7044        let h = Harness::new(&runtime);
7045        let r = h.router.clone();
7046        let (status, body) = runtime.block_on(call(
7047            r.clone(),
7048            "POST",
7049            "/memory",
7050            Some(json!({ "content": "old transport memory" })),
7051        ));
7052        assert_eq!(status, StatusCode::OK);
7053        let mid = body
7054            .get("memory_id")
7055            .and_then(|v| v.as_str())
7056            .expect("memory_id")
7057            .to_string();
7058
7059        let (status, body) = runtime.block_on(call(
7060            r.clone(),
7061            "PATCH",
7062            &format!("/memory/{mid}"),
7063            Some(json!({ "content": "new transport memory" })),
7064        ));
7065        assert_eq!(status, StatusCode::OK, "update failed: {body}");
7066        assert_eq!(
7067            body.get("content").and_then(|v| v.as_str()),
7068            Some("new transport memory")
7069        );
7070
7071        let (status, body) = runtime.block_on(call(r, "GET", &format!("/memory/{mid}"), None));
7072        assert_eq!(status, StatusCode::OK);
7073        assert_eq!(
7074            body.get("content").and_then(|v| v.as_str()),
7075            Some("new transport memory")
7076        );
7077        h.shutdown(&runtime);
7078    }
7079
7080    #[test]
7081    fn empty_content_returns_400() {
7082        let runtime = rt();
7083        let h = Harness::new(&runtime);
7084        let r = h.router.clone();
7085        let (status, body) =
7086            runtime.block_on(call(r, "POST", "/memory", Some(json!({ "content": "" }))));
7087        assert_eq!(status, StatusCode::BAD_REQUEST);
7088        assert!(
7089            body.get("error")
7090                .and_then(|e| e.as_str())
7091                .map(|s| s.contains("must not be empty"))
7092                .unwrap_or(false),
7093            "got: {body}"
7094        );
7095        h.shutdown(&runtime);
7096    }
7097
7098    #[test]
7099    fn empty_query_returns_400() {
7100        let runtime = rt();
7101        let h = Harness::new(&runtime);
7102        let r = h.router.clone();
7103        let (status, body) = runtime.block_on(call(
7104            r,
7105            "POST",
7106            "/memory/search",
7107            Some(json!({ "query": "" })),
7108        ));
7109        assert_eq!(status, StatusCode::BAD_REQUEST);
7110        assert!(
7111            body.get("error")
7112                .and_then(|e| e.as_str())
7113                .map(|s| s.contains("must not be empty"))
7114                .unwrap_or(false),
7115            "got: {body}"
7116        );
7117        h.shutdown(&runtime);
7118    }
7119
7120    #[test]
7121    fn inspect_unknown_returns_404() {
7122        let runtime = rt();
7123        let h = Harness::new(&runtime);
7124        let r = h.router.clone();
7125        let (status, body) = runtime.block_on(call(
7126            r,
7127            "GET",
7128            "/memory/00000000-0000-7000-8000-000000000000",
7129            None,
7130        ));
7131        assert_eq!(status, StatusCode::NOT_FOUND);
7132        assert!(body.get("error").is_some(), "got: {body}");
7133        h.shutdown(&runtime);
7134    }
7135
7136    #[test]
7137    fn inspect_invalid_id_returns_400() {
7138        let runtime = rt();
7139        let h = Harness::new(&runtime);
7140        let r = h.router.clone();
7141        let (status, _body) = runtime.block_on(call(r, "GET", "/memory/not-a-uuid", None));
7142        assert_eq!(status, StatusCode::BAD_REQUEST);
7143        h.shutdown(&runtime);
7144    }
7145
7146    #[test]
7147    fn forget_unknown_returns_404() {
7148        let runtime = rt();
7149        let h = Harness::new(&runtime);
7150        let r = h.router.clone();
7151        let (status, _body) = runtime.block_on(call(
7152            r,
7153            "DELETE",
7154            "/memory/00000000-0000-7000-8000-000000000000",
7155            None,
7156        ));
7157        assert_eq!(status, StatusCode::NOT_FOUND);
7158        h.shutdown(&runtime);
7159    }
7160
7161    /// `POST /memory/consolidate` runs the cluster pass and returns
7162    /// the report as JSON. With an empty body, `ConsolidationScope`
7163    /// defaults to unbounded; with a non-empty body, the
7164    /// `window_days` field is honored. The Harness's writer is
7165    /// spawned without a Steward, so `abstractions_built` stays 0
7166    /// even when `clusters_built` is nonzero — same posture as the
7167    /// daemon today.
7168    #[test]
7169    fn consolidate_endpoint_returns_report() {
7170        let runtime = rt();
7171        let h = Harness::new(&runtime);
7172        let r = h.router.clone();
7173        runtime.block_on(async move {
7174            // Empty DB → all-zero report; structural assertion only.
7175            let (status, body) = call(r.clone(), "POST", "/memory/consolidate", None).await;
7176            assert_eq!(status, StatusCode::OK);
7177            for field in [
7178                "episodes_seen",
7179                "clusters_built",
7180                "episodes_clustered",
7181                "abstractions_built",
7182                "triples_built",
7183                "contradictions_found",
7184            ] {
7185                assert!(
7186                    body.get(field).and_then(|v| v.as_u64()).is_some(),
7187                    "missing field {field}: {body}"
7188                );
7189            }
7190            assert_eq!(body["episodes_seen"], 0);
7191            assert_eq!(body["clusters_built"], 0);
7192
7193            // Non-empty body with window_days → still 200; unmistakable
7194            // shape round-trips through ConsolidationScope's serde.
7195            let (status2, _body2) = call(
7196                r,
7197                "POST",
7198                "/memory/consolidate",
7199                Some(json!({ "window_days": 7 })),
7200            )
7201            .await;
7202            assert_eq!(status2, StatusCode::OK);
7203        });
7204        h.shutdown(&runtime);
7205    }
7206
7207    #[test]
7208    fn auth_required_routes_reject_missing_token() {
7209        let runtime = rt();
7210        let h = Harness::new_with_auth(&runtime, Some("secret-xyz".into()));
7211        let r = h.router.clone();
7212        runtime.block_on(async move {
7213            // No Authorization header → 401.
7214            let (status, _body) = call(
7215                r.clone(),
7216                "POST",
7217                "/memory",
7218                Some(json!({ "content": "x" })),
7219            )
7220            .await;
7221            assert_eq!(status, StatusCode::UNAUTHORIZED);
7222
7223            // Wrong token → 401.
7224            let (status, _body) = call_with_auth(
7225                r.clone(),
7226                "POST",
7227                "/memory",
7228                Some(json!({ "content": "x" })),
7229                Some("Bearer wrong-token"),
7230            )
7231            .await;
7232            assert_eq!(status, StatusCode::UNAUTHORIZED);
7233
7234            // Correct token → handler runs (200).
7235            let (status, body) = call_with_auth(
7236                r.clone(),
7237                "POST",
7238                "/memory",
7239                Some(json!({ "content": "authed" })),
7240                Some("Bearer secret-xyz"),
7241            )
7242            .await;
7243            assert_eq!(status, StatusCode::OK);
7244            assert!(body.get("memory_id").is_some());
7245        });
7246        h.shutdown(&runtime);
7247    }
7248
7249    #[test]
7250    fn health_endpoint_does_not_require_auth() {
7251        let runtime = rt();
7252        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
7253        let r = h.router.clone();
7254        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
7255        // Liveness probes should work without credentials.
7256        assert_eq!(status, StatusCode::OK);
7257        h.shutdown(&runtime);
7258    }
7259
7260    #[test]
7261    fn auth_response_includes_www_authenticate_header() {
7262        // Verify the WWW-Authenticate hint that lets a well-behaved
7263        // client know it's a bearer-auth scheme. We check via raw
7264        // request → response (oneshot returns Response, but our
7265        // call() helper drops the headers; build the request manually).
7266        let runtime = rt();
7267        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
7268        let r = h.router.clone();
7269        runtime.block_on(async move {
7270            let req = Request::builder()
7271                .method("POST")
7272                .uri("/memory")
7273                .header("content-type", "application/json")
7274                .body(Body::from(
7275                    serde_json::to_vec(&json!({ "content": "x" })).unwrap(),
7276                ))
7277                .unwrap();
7278            let resp = r.oneshot(req).await.unwrap();
7279            assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
7280            let www = resp
7281                .headers()
7282                .get("www-authenticate")
7283                .and_then(|v| v.to_str().ok())
7284                .unwrap_or("");
7285            assert!(
7286                www.starts_with("Bearer"),
7287                "expected WWW-Authenticate: Bearer..., got: {www}"
7288            );
7289        });
7290        h.shutdown(&runtime);
7291    }
7292
7293    // ---------------------------------------------------------------------
7294    // v0.8.0 P3: OIDC end-to-end. Spin up a fake IdP (wiremock) that
7295    // serves an OIDC discovery doc + JWKS, mint a token claiming
7296    // `solo_tenant = "default"`, and verify it routes through the
7297    // middleware + TenantExtractor + handler.
7298    // ---------------------------------------------------------------------
7299
7300    fn base64_url_for_test(bytes: &[u8]) -> String {
7301        use base64::Engine;
7302        base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes)
7303    }
7304
7305    /// Spin up a single-purpose fake OIDC IdP for these tests. Returns
7306    /// (mock_server, discovery_url, secret, kid).
7307    async fn spin_fake_idp() -> (wiremock::MockServer, String, Vec<u8>, &'static str) {
7308        use wiremock::matchers::{method, path};
7309        use wiremock::{Mock, MockServer, ResponseTemplate};
7310        let server = MockServer::start().await;
7311        let secret = b"http-test-secret-for-hmac-fixture".to_vec();
7312        let kid = "http-test-kid";
7313        let discovery = serde_json::json!({
7314            "issuer": server.uri(),
7315            "jwks_uri": format!("{}/jwks", server.uri()),
7316        });
7317        Mock::given(method("GET"))
7318            .and(path("/.well-known/openid-configuration"))
7319            .respond_with(ResponseTemplate::new(200).set_body_json(discovery))
7320            .mount(&server)
7321            .await;
7322        let jwks = serde_json::json!({
7323            "keys": [
7324                {
7325                    "kty": "oct",
7326                    "kid": kid,
7327                    "alg": "HS256",
7328                    "k": base64_url_for_test(&secret),
7329                }
7330            ]
7331        });
7332        Mock::given(method("GET"))
7333            .and(path("/jwks"))
7334            .respond_with(ResponseTemplate::new(200).set_body_json(jwks))
7335            .mount(&server)
7336            .await;
7337        let discovery_url = format!("{}/.well-known/openid-configuration", server.uri());
7338        (server, discovery_url, secret, kid)
7339    }
7340
7341    fn mint_idp_token(
7342        server_uri: &str,
7343        kid: &str,
7344        secret: &[u8],
7345        tenant_claim: &str,
7346        audience: &str,
7347    ) -> String {
7348        use jsonwebtoken::{Algorithm, EncodingKey, Header};
7349        let mut header = Header::new(Algorithm::HS256);
7350        header.kid = Some(kid.to_string());
7351        let now = std::time::SystemTime::now()
7352            .duration_since(std::time::UNIX_EPOCH)
7353            .unwrap()
7354            .as_secs();
7355        let claims = serde_json::json!({
7356            "iss": server_uri,
7357            "sub": "test-user-1",
7358            "aud": audience,
7359            "exp": now + 600,
7360            "iat": now,
7361            "solo_tenant": tenant_claim,
7362        });
7363        jsonwebtoken::encode(&header, &claims, &EncodingKey::from_secret(secret))
7364            .expect("mint token")
7365    }
7366
7367    #[test]
7368    fn http_oidc_accept_resolves_to_tenant_from_claim() {
7369        let runtime = rt();
7370        let (fake_server, discovery_url, secret, kid) =
7371            runtime.block_on(async { spin_fake_idp().await });
7372        let server_uri = fake_server.uri();
7373        // Keep the wiremock server alive for the duration of this test.
7374        let _server_guard = fake_server;
7375
7376        let auth = crate::auth::AuthConfig::Oidc {
7377            discovery_url,
7378            audience: "test-audience".to_string(),
7379            tenant_claim_name: "solo_tenant".to_string(),
7380        };
7381        let h = Harness::new_with_auth_config(&runtime, Some(auth));
7382        let r = h.router.clone();
7383
7384        // Mint a token claiming the harness's default tenant.
7385        let token = mint_idp_token(&server_uri, kid, &secret, "default", "test-audience");
7386
7387        runtime.block_on(async move {
7388            // POST /memory with a valid OIDC token → handler runs, returns memory_id.
7389            let (status, body) = call_with_auth(
7390                r.clone(),
7391                "POST",
7392                "/memory",
7393                Some(json!({ "content": "oidc-routed content" })),
7394                Some(&format!("Bearer {token}")),
7395            )
7396            .await;
7397            assert_eq!(status, StatusCode::OK, "got body: {body}");
7398            assert!(body.get("memory_id").is_some(), "no memory_id in {body}");
7399        });
7400        h.shutdown(&runtime);
7401    }
7402
7403    #[test]
7404    fn http_oidc_reject_missing_token_returns_401() {
7405        let runtime = rt();
7406        let (fake_server, discovery_url, _secret, _kid) =
7407            runtime.block_on(async { spin_fake_idp().await });
7408        let _server_guard = fake_server;
7409        let auth = crate::auth::AuthConfig::Oidc {
7410            discovery_url,
7411            audience: "test-audience".to_string(),
7412            tenant_claim_name: "solo_tenant".to_string(),
7413        };
7414        let h = Harness::new_with_auth_config(&runtime, Some(auth));
7415        let r = h.router.clone();
7416        runtime.block_on(async move {
7417            // No Authorization header.
7418            let (status, _body) = call(
7419                r.clone(),
7420                "POST",
7421                "/memory",
7422                Some(json!({ "content": "x" })),
7423            )
7424            .await;
7425            assert_eq!(status, StatusCode::UNAUTHORIZED);
7426
7427            // Garbage token → 401 (invalid signature / not a JWT).
7428            let (status, _body) = call_with_auth(
7429                r.clone(),
7430                "POST",
7431                "/memory",
7432                Some(json!({ "content": "x" })),
7433                Some("Bearer not-a-real-jwt"),
7434            )
7435            .await;
7436            assert_eq!(status, StatusCode::UNAUTHORIZED);
7437        });
7438        h.shutdown(&runtime);
7439    }
7440
7441    #[test]
7442    fn full_remember_recall_inspect_forget_round_trip() {
7443        let runtime = rt();
7444        let h = Harness::new(&runtime);
7445        let r = h.router.clone();
7446        runtime.block_on(async move {
7447            // POST /memory
7448            let (status, body) = call(
7449                r.clone(),
7450                "POST",
7451                "/memory",
7452                Some(json!({ "content": "round-trip content" })),
7453            )
7454            .await;
7455            assert_eq!(status, StatusCode::OK);
7456            let mid = body
7457                .get("memory_id")
7458                .and_then(|v| v.as_str())
7459                .unwrap()
7460                .to_string();
7461
7462            // POST /memory/search — exact-match (StubEmbedder) returns the row.
7463            let (status, body) = call(
7464                r.clone(),
7465                "POST",
7466                "/memory/search",
7467                Some(json!({ "query": "round-trip content", "limit": 5 })),
7468            )
7469            .await;
7470            assert_eq!(status, StatusCode::OK);
7471            assert!(
7472                body.get("candidates_considered")
7473                    .and_then(|v| v.as_u64())
7474                    .is_some_and(|n| n >= 1),
7475                "recall should expose pre-filter candidate diagnostics: {body}"
7476            );
7477            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
7478            assert!(
7479                hits.iter().any(
7480                    |h| h.get("content").and_then(|c| c.as_str()) == Some("round-trip content")
7481                ),
7482                "expected hit with content; got: {body}"
7483            );
7484
7485            // GET /memory/{id}
7486            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
7487            assert_eq!(status, StatusCode::OK);
7488            assert_eq!(body.get("status").and_then(|v| v.as_str()), Some("active"));
7489
7490            // DELETE /memory/{id}
7491            let (status, _body) = call(r.clone(), "DELETE", &format!("/memory/{mid}"), None).await;
7492            assert_eq!(status, StatusCode::NO_CONTENT);
7493
7494            // GET again — still readable but status='forgotten'
7495            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
7496            assert_eq!(status, StatusCode::OK);
7497            assert_eq!(
7498                body.get("status").and_then(|v| v.as_str()),
7499                Some("forgotten")
7500            );
7501
7502            // POST /memory/search — forgotten row excluded.
7503            let (status, body) = call(
7504                r.clone(),
7505                "POST",
7506                "/memory/search",
7507                Some(json!({ "query": "round-trip content", "limit": 5 })),
7508            )
7509            .await;
7510            assert_eq!(status, StatusCode::OK);
7511            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
7512            assert!(
7513                hits.iter()
7514                    .all(|h| h.get("memory_id").and_then(|m| m.as_str()) != Some(mid.as_str())),
7515                "forgotten row should be excluded from recall: {body}"
7516            );
7517        });
7518        h.shutdown(&runtime);
7519    }
7520
7521    #[test]
7522    fn memory_context_endpoint_returns_bundle() {
7523        let runtime = rt();
7524        let h = Harness::new(&runtime);
7525        let r = h.router.clone();
7526        runtime.block_on(async move {
7527            let (status, _body) = call(
7528                r.clone(),
7529                "POST",
7530                "/memory",
7531                Some(json!({ "content": "http memory context needle" })),
7532            )
7533            .await;
7534            assert_eq!(status, StatusCode::OK);
7535
7536            let (status, body) = call(
7537                r,
7538                "POST",
7539                "/memory/context",
7540                Some(json!({ "query": "memory context needle", "limit": 5 })),
7541            )
7542            .await;
7543            assert_eq!(status, StatusCode::OK);
7544            assert_eq!(
7545                body.get("query").and_then(|v| v.as_str()),
7546                Some("memory context needle")
7547            );
7548            let hits = body
7549                .pointer("/recall/hits")
7550                .and_then(|v| v.as_array())
7551                .unwrap_or_else(|| panic!("missing /recall/hits: {body}"));
7552            assert!(
7553                hits.iter()
7554                    .any(|h| h.get("content").and_then(|c| c.as_str())
7555                        == Some("http memory context needle")),
7556                "expected context recall hit: {body}"
7557            );
7558            assert!(body.get("themes").is_some_and(|v| v.is_array()));
7559            assert!(body.get("facts").is_some_and(|v| v.is_array()));
7560            assert!(body.get("contradictions").is_some_and(|v| v.is_array()));
7561        });
7562        h.shutdown(&runtime);
7563    }
7564
7565    // Path 1 derived-layer endpoint tests (v0.4.0+). Wire-path only —
7566    // the actual content correctness is covered by solo-query::derived's
7567    // own tests (Sub-task A). These verify the HTTP shape: GET routing,
7568    // Query-string param parsing, JSON-array response body, validation
7569    // 400s for invalid inputs.
7570
7571    #[test]
7572    fn themes_endpoint_returns_empty_array_on_empty_db() {
7573        let runtime = rt();
7574        let h = Harness::new(&runtime);
7575        let r = h.router.clone();
7576        let (status, body) = runtime.block_on(call(r, "GET", "/memory/themes", None));
7577        assert_eq!(status, StatusCode::OK);
7578        assert!(body.is_array(), "expected array, got {body}");
7579        assert_eq!(body.as_array().unwrap().len(), 0);
7580        h.shutdown(&runtime);
7581    }
7582
7583    #[test]
7584    fn themes_endpoint_passes_through_query_params() {
7585        let runtime = rt();
7586        let h = Harness::new(&runtime);
7587        let r = h.router.clone();
7588        let (status, body) = runtime.block_on(call(
7589            r,
7590            "GET",
7591            "/memory/themes?window_days=7&limit=20",
7592            None,
7593        ));
7594        assert_eq!(status, StatusCode::OK);
7595        assert!(body.is_array(), "expected array, got {body}");
7596        h.shutdown(&runtime);
7597    }
7598
7599    #[test]
7600    fn facts_about_endpoint_requires_subject() {
7601        let runtime = rt();
7602        let h = Harness::new(&runtime);
7603        let r = h.router.clone();
7604        // Missing subject — axum's Query extractor 422 (Unprocessable
7605        // Entity) on missing required field; some axum versions
7606        // surface as 400. Accept either.
7607        let (status, _body) = runtime.block_on(call(r, "GET", "/memory/facts_about", None));
7608        assert!(
7609            status == StatusCode::BAD_REQUEST || status == StatusCode::UNPROCESSABLE_ENTITY,
7610            "expected 400 or 422 for missing subject, got {status}"
7611        );
7612        h.shutdown(&runtime);
7613    }
7614
7615    #[test]
7616    fn facts_about_endpoint_rejects_blank_subject() {
7617        let runtime = rt();
7618        let h = Harness::new(&runtime);
7619        let r = h.router.clone();
7620        // Whitespace-only subject reaches the handler then trips its
7621        // own validation → ApiError::bad_request → 400.
7622        let (status, body) =
7623            runtime.block_on(call(r, "GET", "/memory/facts_about?subject=%20%20", None));
7624        assert_eq!(status, StatusCode::BAD_REQUEST);
7625        assert!(
7626            body.get("error")
7627                .and_then(|v| v.as_str())
7628                .is_some_and(|s| s.contains("subject")),
7629            "expected error mentioning subject, got {body}"
7630        );
7631        h.shutdown(&runtime);
7632    }
7633
7634    #[test]
7635    fn facts_about_endpoint_returns_empty_array_for_unknown_subject() {
7636        let runtime = rt();
7637        let h = Harness::new(&runtime);
7638        let r = h.router.clone();
7639        let (status, body) = runtime.block_on(call(
7640            r,
7641            "GET",
7642            "/memory/facts_about?subject=NobodyKnows",
7643            None,
7644        ));
7645        assert_eq!(status, StatusCode::OK);
7646        assert_eq!(body.as_array().unwrap().len(), 0);
7647        h.shutdown(&runtime);
7648    }
7649
7650    #[test]
7651    fn facts_about_endpoint_parses_include_as_object_query_param() {
7652        // v0.5.1 P8: `?include_as_object=true` must parse cleanly
7653        // through the `Query<FactsAboutQuery>` extractor. If the
7654        // struct field is missing or wrongly typed, axum returns
7655        // 400/422 before reaching the handler. We don't seed
7656        // triples; we only need the request to reach the handler
7657        // and produce a normal 200 + empty array. Mirrors
7658        // `inspect_cluster_endpoint_passes_full_content_query_param`.
7659        let runtime = rt();
7660        let h = Harness::new(&runtime);
7661        let r = h.router.clone();
7662        let (status, body) = runtime.block_on(call(
7663            r,
7664            "GET",
7665            "/memory/facts_about?subject=Maya&include_as_object=true",
7666            None,
7667        ));
7668        assert_eq!(
7669            status,
7670            StatusCode::OK,
7671            "expected 200 with include_as_object query param, got {status}"
7672        );
7673        assert!(body.is_array());
7674        h.shutdown(&runtime);
7675    }
7676
7677    #[test]
7678    fn entities_endpoint_returns_matching_graph_entities() {
7679        let runtime = rt();
7680        let h = Harness::new(&runtime);
7681        {
7682            let conn = h.open_db();
7683            let memory_id = MemoryId::new().to_string();
7684            let rowid = seed_episode(&conn, &memory_id, 100, "Alice works with graph transport");
7685            seed_triple_row(
7686                &conn,
7687                "t-http-entity-1",
7688                "Alice",
7689                "knows",
7690                "Bob",
7691                Some(rowid),
7692            );
7693            seed_triple_row(
7694                &conn,
7695                "t-http-entity-2",
7696                "Alicia",
7697                "works_at",
7698                "Solo",
7699                Some(rowid),
7700            );
7701        }
7702
7703        let r = h.router.clone();
7704        let (status, body) =
7705            runtime.block_on(call(r, "GET", "/memory/entities?query=Ali&limit=5", None));
7706        assert_eq!(status, StatusCode::OK);
7707        let arr = body.as_array().expect("entities array");
7708        assert!(
7709            arr.iter()
7710                .any(|v| v.get("entity_id").and_then(|id| id.as_str()) == Some("Alice")),
7711            "expected Alice entity, got {body}"
7712        );
7713        h.shutdown(&runtime);
7714    }
7715
7716    #[test]
7717    fn inspect_cluster_endpoint_unknown_id_returns_404() {
7718        // Maps `Error::NotFound` from `solo_query::inspect_cluster`
7719        // through `ApiError::from` → 404. Mirrors the unknown-memory
7720        // case for `GET /memory/{id}`.
7721        let runtime = rt();
7722        let h = Harness::new(&runtime);
7723        let r = h.router.clone();
7724        let (status, body) =
7725            runtime.block_on(call(r, "GET", "/memory/clusters/no-such-cluster", None));
7726        assert_eq!(status, StatusCode::NOT_FOUND);
7727        assert!(
7728            body.get("error")
7729                .and_then(|v| v.as_str())
7730                .is_some_and(|s| s.contains("no-such-cluster")),
7731            "expected error mentioning cluster id, got {body}"
7732        );
7733        h.shutdown(&runtime);
7734    }
7735
7736    #[test]
7737    fn inspect_cluster_endpoint_passes_full_content_query_param() {
7738        // Even with no matching cluster (→ 404), the request must
7739        // reach the handler — proves the `?full_content=true` query
7740        // string parses cleanly (Query<InspectClusterQuery>::default
7741        // path didn't choke). If we accidentally fail at the extractor
7742        // we'd get a 400/422, not the expected 404.
7743        let runtime = rt();
7744        let h = Harness::new(&runtime);
7745        let r = h.router.clone();
7746        let (status, _body) = runtime.block_on(call(
7747            r,
7748            "GET",
7749            "/memory/clusters/missing?full_content=true",
7750            None,
7751        ));
7752        assert_eq!(status, StatusCode::NOT_FOUND);
7753        h.shutdown(&runtime);
7754    }
7755
7756    #[test]
7757    fn contradictions_endpoint_returns_empty_array_on_empty_db() {
7758        let runtime = rt();
7759        let h = Harness::new(&runtime);
7760        let r = h.router.clone();
7761        let (status, body) = runtime.block_on(call(r, "GET", "/memory/contradictions", None));
7762        assert_eq!(status, StatusCode::OK);
7763        assert!(body.is_array());
7764        assert_eq!(body.as_array().unwrap().len(), 0);
7765        h.shutdown(&runtime);
7766    }
7767
7768    #[test]
7769    fn contradiction_resolve_endpoint_updates_lifecycle() {
7770        let runtime = rt();
7771        let h = Harness::new(&runtime);
7772        {
7773            let conn = h.open_db();
7774            let memory_id = MemoryId::new().to_string();
7775            let rowid = seed_episode(&conn, &memory_id, 100, "contradiction source");
7776            seed_triple_row(&conn, "t-http-a", "Alice", "likes", "tea", Some(rowid));
7777            seed_triple_row(&conn, "t-http-b", "Alice", "likes", "coffee", Some(rowid));
7778            seed_contradiction_row(&conn, "t-http-a", "t-http-b", "other");
7779        }
7780
7781        let r = h.router.clone();
7782        let (status, body) = runtime.block_on(call(
7783            r.clone(),
7784            "POST",
7785            "/memory/contradictions/resolve",
7786            Some(json!({
7787                "a_id": "t-http-a",
7788                "b_id": "t-http-b",
7789                "kind": "other",
7790                "resolution_note": "tea is current",
7791                "winning_triple_id": "t-http-a"
7792            })),
7793        ));
7794        assert_eq!(status, StatusCode::OK, "resolve failed: {body}");
7795        assert_eq!(
7796            body.get("status").and_then(|v| v.as_str()),
7797            Some("resolved")
7798        );
7799        assert!(
7800            body.get("resolved_at_ms")
7801                .and_then(|v| v.as_i64())
7802                .is_some()
7803        );
7804
7805        let (status, body) = runtime.block_on(call(r, "GET", "/memory/contradictions", None));
7806        assert_eq!(status, StatusCode::OK);
7807        assert_eq!(
7808            body.pointer("/0/status").and_then(|v| v.as_str()),
7809            Some("resolved")
7810        );
7811        h.shutdown(&runtime);
7812    }
7813
7814    #[test]
7815    fn derived_endpoints_require_bearer_when_auth_enabled() {
7816        let runtime = rt();
7817        let h = Harness::new_with_auth(&runtime, Some("secret-token".to_string()));
7818        // Each of the three new endpoints should reject missing token.
7819        // Per the existing tests' shutdown-timing comment: don't hold a
7820        // long-lived router clone across multiple iterations — drop the
7821        // clone before each subsequent oneshot, and don't keep a `let r =
7822        // h.router.clone()` alive across h.shutdown(). Re-clone per
7823        // iteration; the per-call clone is consumed by oneshot.
7824        for path in [
7825            "/memory/themes",
7826            "/memory/facts_about?subject=Sam",
7827            "/memory/entities?query=Sam",
7828            "/memory/contradictions",
7829            "/memory/clusters/any-id",
7830        ] {
7831            let (status, _) = runtime.block_on(call(h.router.clone(), "GET", path, None));
7832            assert_eq!(
7833                status,
7834                StatusCode::UNAUTHORIZED,
7835                "{path} should 401 without token"
7836            );
7837        }
7838        h.shutdown(&runtime);
7839    }
7840
7841    // ---- Document endpoints (v0.7.0 P6) ----
7842    //
7843    // Wire-path coverage. The `Harness` here uses
7844    // `WriterActor::spawn_full` without an embedder — same shape as the
7845    // existing handler tests. Ingest/search would fail at the writer
7846    // boundary with "writer has no embedder", but every other path
7847    // (404s, malformed ids, route shape, bearer auth gating, OpenAPI
7848    // documentation) is exercisable. Real end-to-end ingest→search
7849    // round-trip lives in `mcp_smoke.rs` where a real subprocess runs
7850    // with a fully-wired writer.
7851
7852    #[test]
7853    fn list_documents_endpoint_returns_empty_array_on_empty_db() {
7854        let runtime = rt();
7855        let h = Harness::new(&runtime);
7856        let r = h.router.clone();
7857        let (status, body) = runtime.block_on(call(r, "GET", "/memory/documents", None));
7858        assert_eq!(status, StatusCode::OK);
7859        assert!(body.is_array(), "expected array, got {body}");
7860        assert_eq!(body.as_array().unwrap().len(), 0);
7861        h.shutdown(&runtime);
7862    }
7863
7864    #[test]
7865    fn list_documents_endpoint_parses_query_params() {
7866        let runtime = rt();
7867        let h = Harness::new(&runtime);
7868        let r = h.router.clone();
7869        let (status, body) = runtime.block_on(call(
7870            r,
7871            "GET",
7872            "/memory/documents?limit=5&offset=0&include_forgotten=true",
7873            None,
7874        ));
7875        assert_eq!(status, StatusCode::OK);
7876        assert!(body.is_array());
7877        h.shutdown(&runtime);
7878    }
7879
7880    #[test]
7881    fn ingest_document_endpoint_rejects_empty_path() {
7882        let runtime = rt();
7883        let h = Harness::new(&runtime);
7884        let r = h.router.clone();
7885        let (status, body) = runtime.block_on(call(
7886            r,
7887            "POST",
7888            "/memory/documents",
7889            Some(json!({ "path": "" })),
7890        ));
7891        assert_eq!(status, StatusCode::BAD_REQUEST);
7892        assert!(
7893            body.get("error")
7894                .and_then(|v| v.as_str())
7895                .is_some_and(|s| s.contains("path")),
7896            "expected error mentioning path, got {body}"
7897        );
7898        h.shutdown(&runtime);
7899    }
7900
7901    #[test]
7902    fn search_docs_endpoint_rejects_empty_query() {
7903        let runtime = rt();
7904        let h = Harness::new(&runtime);
7905        let r = h.router.clone();
7906        let (status, body) = runtime.block_on(call(
7907            r,
7908            "POST",
7909            "/memory/documents/search",
7910            Some(json!({ "query": "   " })),
7911        ));
7912        assert_eq!(status, StatusCode::BAD_REQUEST);
7913        assert!(
7914            body.get("error")
7915                .and_then(|v| v.as_str())
7916                .is_some_and(|s| s.contains("must not be empty") || s.contains("doc_search")),
7917            "expected error mentioning empty query, got {body}"
7918        );
7919        h.shutdown(&runtime);
7920    }
7921
7922    #[test]
7923    fn inspect_document_endpoint_unknown_id_returns_404() {
7924        let runtime = rt();
7925        let h = Harness::new(&runtime);
7926        let r = h.router.clone();
7927        let (status, body) = runtime.block_on(call(
7928            r,
7929            "GET",
7930            "/memory/documents/00000000-0000-7000-8000-000000000000",
7931            None,
7932        ));
7933        assert_eq!(status, StatusCode::NOT_FOUND);
7934        assert!(body.get("error").is_some(), "got: {body}");
7935        h.shutdown(&runtime);
7936    }
7937
7938    #[test]
7939    fn inspect_document_endpoint_rejects_malformed_id() {
7940        let runtime = rt();
7941        let h = Harness::new(&runtime);
7942        let r = h.router.clone();
7943        let (status, _body) =
7944            runtime.block_on(call(r, "GET", "/memory/documents/not-a-uuid", None));
7945        assert_eq!(status, StatusCode::BAD_REQUEST);
7946        h.shutdown(&runtime);
7947    }
7948
7949    #[test]
7950    fn forget_document_endpoint_unknown_id_returns_404() {
7951        // Valid UUID format; no row exists → writer's `forget_document`
7952        // returns Error::NotFound → mapped to 404 by `ApiError::from`.
7953        let runtime = rt();
7954        let h = Harness::new(&runtime);
7955        let r = h.router.clone();
7956        let (status, _body) = runtime.block_on(call(
7957            r,
7958            "DELETE",
7959            "/memory/documents/00000000-0000-7000-8000-000000000000",
7960            None,
7961        ));
7962        assert_eq!(status, StatusCode::NOT_FOUND);
7963        h.shutdown(&runtime);
7964    }
7965
7966    #[test]
7967    fn forget_document_endpoint_rejects_malformed_id() {
7968        let runtime = rt();
7969        let h = Harness::new(&runtime);
7970        let r = h.router.clone();
7971        let (status, _body) =
7972            runtime.block_on(call(r, "DELETE", "/memory/documents/not-a-uuid", None));
7973        assert_eq!(status, StatusCode::BAD_REQUEST);
7974        h.shutdown(&runtime);
7975    }
7976
7977    #[test]
7978    fn document_endpoints_require_bearer_when_auth_enabled() {
7979        // All five doc endpoints sit behind the same authed Router and
7980        // must 401 without the bearer token. Mirrors
7981        // `derived_endpoints_require_bearer_when_auth_enabled`.
7982        let runtime = rt();
7983        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
7984        let cases: &[(&str, &str, Option<Value>)] = &[
7985            ("POST", "/memory/documents", Some(json!({ "path": "/x" }))),
7986            ("GET", "/memory/documents", None),
7987            (
7988                "POST",
7989                "/memory/documents/search",
7990                Some(json!({ "query": "x" })),
7991            ),
7992            (
7993                "GET",
7994                "/memory/documents/00000000-0000-7000-8000-000000000000",
7995                None,
7996            ),
7997            (
7998                "DELETE",
7999                "/memory/documents/00000000-0000-7000-8000-000000000000",
8000                None,
8001            ),
8002        ];
8003        for (method, path, body) in cases {
8004            let (status, _) = runtime.block_on(call(h.router.clone(), method, path, body.clone()));
8005            assert_eq!(
8006                status,
8007                StatusCode::UNAUTHORIZED,
8008                "{method} {path} should 401 without token"
8009            );
8010        }
8011        h.shutdown(&runtime);
8012    }
8013
8014    #[test]
8015    fn document_endpoints_accept_correct_bearer_token() {
8016        // Sanity check: with the right token, the same five endpoints
8017        // pass auth and reach the handler. We only assert that the
8018        // status code is NOT 401 — exact downstream behaviour depends
8019        // on the harness (no embedder → ingest/search would 500; empty
8020        // DB → list/inspect/forget return 200/404).
8021        let runtime = rt();
8022        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
8023        runtime.block_on(async {
8024            // GET /memory/documents → 200 + empty array (auth passes).
8025            let (status, _) = call_with_auth(
8026                h.router.clone(),
8027                "GET",
8028                "/memory/documents",
8029                None,
8030                Some("Bearer doc-secret"),
8031            )
8032            .await;
8033            assert_eq!(status, StatusCode::OK);
8034
8035            // GET /memory/documents/<unknown> → 404 (auth passes).
8036            let (status, _) = call_with_auth(
8037                h.router.clone(),
8038                "GET",
8039                "/memory/documents/00000000-0000-7000-8000-000000000000",
8040                None,
8041                Some("Bearer doc-secret"),
8042            )
8043            .await;
8044            assert_eq!(status, StatusCode::NOT_FOUND);
8045        });
8046        h.shutdown(&runtime);
8047    }
8048
8049    // ---------------------------------------------------------------------
8050    // v0.8.0 P2: tenant header extractor tests
8051    // ---------------------------------------------------------------------
8052
8053    /// `X-Solo-Tenant: default` resolves to the default tenant (which
8054    /// in the test harness is the only one wired in the registry).
8055    #[test]
8056    fn tenant_header_default_resolves() {
8057        let runtime = rt();
8058        let h = Harness::new(&runtime);
8059        let r = h.router.clone();
8060        let (status, _body) = runtime.block_on(async {
8061            let req = Request::builder()
8062                .method("GET")
8063                .uri("/memory/00000000-0000-7000-8000-000000000000")
8064                .header("x-solo-tenant", "default")
8065                .body(Body::empty())
8066                .unwrap();
8067            let resp = r.oneshot(req).await.expect("oneshot");
8068            let s = resp.status();
8069            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8070            (s, _b)
8071        });
8072        // 404 because the id doesn't exist — but it's a routed 404 from
8073        // inspect_handler, not a 400 from a bad tenant header. That's
8074        // the proof point.
8075        assert_eq!(status, StatusCode::NOT_FOUND);
8076        h.shutdown(&runtime);
8077    }
8078
8079    /// `X-Solo-Tenant: UPPER` → 400 (invalid tenant id format).
8080    #[test]
8081    fn tenant_header_invalid_returns_400() {
8082        let runtime = rt();
8083        let h = Harness::new(&runtime);
8084        let r = h.router.clone();
8085        let (status, body) = runtime.block_on(async {
8086            let req = Request::builder()
8087                .method("GET")
8088                .uri("/memory/00000000-0000-7000-8000-000000000000")
8089                .header("x-solo-tenant", "UPPER")
8090                .body(Body::empty())
8091                .unwrap();
8092            let resp = r.oneshot(req).await.expect("oneshot");
8093            let s = resp.status();
8094            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
8095            let v: Value = serde_json::from_slice(&bytes).unwrap_or(Value::Null);
8096            (s, v)
8097        });
8098        assert_eq!(status, StatusCode::BAD_REQUEST);
8099        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
8100        assert!(
8101            msg.to_lowercase().contains("tenant") || msg.to_lowercase().contains("invalid"),
8102            "error must mention tenant/invalid: {msg}"
8103        );
8104        h.shutdown(&runtime);
8105    }
8106
8107    /// `X-Solo-Tenant: never-registered` → 404 (unknown tenant id).
8108    #[test]
8109    fn tenant_header_unknown_returns_404() {
8110        let runtime = rt();
8111        let h = Harness::new(&runtime);
8112        let r = h.router.clone();
8113        let (status, _body) = runtime.block_on(async {
8114            let req = Request::builder()
8115                .method("GET")
8116                .uri("/memory/00000000-0000-7000-8000-000000000000")
8117                .header("x-solo-tenant", "never-registered")
8118                .body(Body::empty())
8119                .unwrap();
8120            let resp = r.oneshot(req).await.expect("oneshot");
8121            let s = resp.status();
8122            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8123            (s, _b)
8124        });
8125        assert_eq!(status, StatusCode::NOT_FOUND);
8126        h.shutdown(&runtime);
8127    }
8128
8129    /// No `X-Solo-Tenant` header → falls back to state.default_tenant.
8130    /// The reach-through to `inspect_handler` should produce the normal
8131    /// 404 for an unknown id rather than a tenant-routing error.
8132    #[test]
8133    fn tenant_header_missing_defaults_to_state_default_tenant() {
8134        let runtime = rt();
8135        let h = Harness::new(&runtime);
8136        let r = h.router.clone();
8137        let (status, _body) = runtime.block_on(async {
8138            let req = Request::builder()
8139                .method("GET")
8140                .uri("/memory/00000000-0000-7000-8000-000000000000")
8141                .body(Body::empty())
8142                .unwrap();
8143            let resp = r.oneshot(req).await.expect("oneshot");
8144            let s = resp.status();
8145            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8146            (s, _b)
8147        });
8148        assert_eq!(status, StatusCode::NOT_FOUND);
8149        h.shutdown(&runtime);
8150    }
8151
8152    // ---------------------------------------------------------------------
8153    // v0.9.x: GET /v1/graph/expand
8154    //
8155    // Seeds tables directly via the Harness's side connection and walks
8156    // the four expansion kinds. The Harness is single-tenant (default);
8157    // the routing-isolation case is already covered by the
8158    // `tenant_header_*` tests above (an `X-Solo-Tenant: never-registered`
8159    // header against the same node_id surfaces 404 from the registry,
8160    // proving cross-tenant lookups can't bleed).
8161    // ---------------------------------------------------------------------
8162
8163    /// Insert one episode row directly. Returns its rowid for callers
8164    /// that need to wire `triples.source_episode_id`.
8165    fn seed_episode(
8166        conn: &rusqlite::Connection,
8167        memory_id: &str,
8168        ts_ms: i64,
8169        content: &str,
8170    ) -> i64 {
8171        conn.execute(
8172            "INSERT INTO episodes
8173                (memory_id, ts_ms, source_type, content,
8174                 encoding_context_json, tier, status,
8175                 confidence, strength, salience,
8176                 created_at_ms, updated_at_ms)
8177                VALUES (?1, ?2, 'user_message', ?3,
8178                        '{}', 'hot', 'active',
8179                        1.0, 0.5, 0.5, ?2, ?2)",
8180            rusqlite::params![memory_id, ts_ms, content],
8181        )
8182        .expect("seed episode");
8183        conn.last_insert_rowid()
8184    }
8185
8186    fn seed_cluster_row(conn: &rusqlite::Connection, cluster_id: &str, created_at_ms: i64) {
8187        conn.execute(
8188            "INSERT INTO clusters (cluster_id, coherence, created_at_ms)
8189                  VALUES (?1, 0.5, ?2)",
8190            rusqlite::params![cluster_id, created_at_ms],
8191        )
8192        .expect("seed cluster");
8193    }
8194
8195    fn seed_cluster_member(conn: &rusqlite::Connection, cluster_id: &str, memory_id: &str) {
8196        conn.execute(
8197            "INSERT INTO cluster_episodes (cluster_id, memory_id) VALUES (?1, ?2)",
8198            rusqlite::params![cluster_id, memory_id],
8199        )
8200        .expect("seed cluster_episodes");
8201    }
8202
8203    fn seed_document_row(conn: &rusqlite::Connection, doc_id: &str, title: &str) {
8204        conn.execute(
8205            "INSERT INTO documents
8206                (doc_id, source, title, mime_type, ingested_at_ms,
8207                 modified_at_ms, status, chunk_count, content_hash, byte_size)
8208                VALUES (?1, ?2, ?3, 'text/plain', 0, NULL,
8209                        'active', 0, ?1, NULL)",
8210            rusqlite::params![doc_id, format!("/tmp/{title}.txt"), title],
8211        )
8212        .expect("seed doc");
8213    }
8214
8215    fn seed_chunk_row(
8216        conn: &rusqlite::Connection,
8217        chunk_id: &str,
8218        doc_id: &str,
8219        chunk_index: i64,
8220        content: &str,
8221    ) {
8222        conn.execute(
8223            "INSERT INTO document_chunks
8224                (chunk_id, doc_id, chunk_index, content,
8225                 token_count, start_offset, end_offset, created_at_ms)
8226                VALUES (?1, ?2, ?3, ?4, 1, 0, ?5, 0)",
8227            rusqlite::params![chunk_id, doc_id, chunk_index, content, content.len() as i64],
8228        )
8229        .expect("seed chunk");
8230    }
8231
8232    fn seed_triple_row(
8233        conn: &rusqlite::Connection,
8234        triple_id: &str,
8235        subject: &str,
8236        predicate: &str,
8237        object: &str,
8238        source_episode_rowid: Option<i64>,
8239    ) {
8240        conn.execute(
8241            "INSERT INTO triples
8242                 (triple_id, subject_id, predicate, object_id, object_kind,
8243                  valid_from_ms, valid_to_ms, confidence, provenance_json,
8244                  status, created_at_ms, updated_at_ms, source_episode_id)
8245                 VALUES (?1, ?2, ?3, ?4, 'literal', 0, NULL, 0.9, '{}',
8246                         'active', 0, 0, ?5)",
8247            rusqlite::params![triple_id, subject, predicate, object, source_episode_rowid],
8248        )
8249        .expect("seed triple");
8250    }
8251
8252    fn seed_contradiction_row(conn: &rusqlite::Connection, a_id: &str, b_id: &str, kind: &str) {
8253        conn.execute(
8254            "INSERT INTO contradictions
8255                 (a_memory_id, b_memory_id, kind, explanation, detected_at_ms,
8256                  status, resolved_at_ms, resolution_note, winning_triple_id)
8257                 VALUES (?1, ?2, ?3, 'test contradiction', 0,
8258                         'unresolved', NULL, NULL, NULL)",
8259            rusqlite::params![a_id, b_id, kind],
8260        )
8261        .expect("seed contradiction");
8262    }
8263
8264    /// Insert a `semantic_abstractions` row (cluster LLM summary). Used
8265    /// by the cluster-inspect test to verify the abstraction concat path.
8266    fn seed_abstraction_row(
8267        conn: &rusqlite::Connection,
8268        abstraction_id: &str,
8269        cluster_id: &str,
8270        content: &str,
8271    ) {
8272        conn.execute(
8273            "INSERT INTO semantic_abstractions
8274                 (abstraction_id, cluster_id, content, provenance_json,
8275                  confidence, created_at_ms)
8276                 VALUES (?1, ?2, ?3, '{}', 0.9, 0)",
8277            rusqlite::params![abstraction_id, cluster_id, content],
8278        )
8279        .expect("seed abstraction");
8280    }
8281
8282    /// Tests use simple ASCII node_ids (UUID-shaped + plain entity strings),
8283    /// so we percent-encode only `:` and a few other delimiters by hand.
8284    fn percent_encode_node_id(node_id: &str) -> String {
8285        let mut out = String::with_capacity(node_id.len());
8286        for c in node_id.chars() {
8287            match c {
8288                ':' => out.push_str("%3A"),
8289                ' ' => out.push_str("%20"),
8290                '&' => out.push_str("%26"),
8291                '+' => out.push_str("%2B"),
8292                '?' => out.push_str("%3F"),
8293                '#' => out.push_str("%23"),
8294                _ => out.push(c),
8295            }
8296        }
8297        out
8298    }
8299
8300    fn graph_uri(node_id: &str, kind: &str) -> String {
8301        let encoded = percent_encode_node_id(node_id);
8302        format!("/v1/graph/expand?node_id={encoded}&kind={kind}")
8303    }
8304
8305    fn graph_uri_with_limit(node_id: &str, kind: &str, limit: u32) -> String {
8306        let encoded = percent_encode_node_id(node_id);
8307        format!("/v1/graph/expand?node_id={encoded}&kind={kind}&limit={limit}")
8308    }
8309
8310    #[test]
8311    fn expand_cluster_member_from_episode_returns_clusters() {
8312        let runtime = rt();
8313        let h = Harness::new(&runtime);
8314        let memory_id = "11111111-1111-7000-8000-000000000001";
8315        {
8316            let conn = h.open_db();
8317            seed_episode(&conn, memory_id, 100, "ep content");
8318            seed_cluster_row(&conn, "cl-a", 200);
8319            seed_cluster_member(&conn, "cl-a", memory_id);
8320        }
8321        let node_id = format!("ep:{memory_id}");
8322        let (status, body) = runtime.block_on(call(
8323            h.router.clone(),
8324            "GET",
8325            &graph_uri(&node_id, "cluster_member"),
8326            None,
8327        ));
8328        assert_eq!(status, StatusCode::OK, "body: {body}");
8329        let nodes = body
8330            .get("nodes")
8331            .and_then(|v| v.as_array())
8332            .expect("nodes array");
8333        let edges = body
8334            .get("edges")
8335            .and_then(|v| v.as_array())
8336            .expect("edges array");
8337        assert_eq!(nodes.len(), 1, "{body}");
8338        assert_eq!(nodes[0]["id"], "cl:cl-a");
8339        assert_eq!(nodes[0]["kind"], "cluster");
8340        assert_eq!(edges.len(), 1);
8341        assert_eq!(edges[0]["source"], node_id);
8342        assert_eq!(edges[0]["target"], "cl:cl-a");
8343        assert_eq!(edges[0]["kind"], "cluster_member");
8344        h.shutdown(&runtime);
8345    }
8346
8347    #[test]
8348    fn expand_cluster_member_from_cluster_returns_episodes() {
8349        let runtime = rt();
8350        let h = Harness::new(&runtime);
8351        {
8352            let conn = h.open_db();
8353            seed_cluster_row(&conn, "cl-multi", 500);
8354            for i in 0..5 {
8355                let mid = format!("2222{i}222-2222-7000-8000-000000000001");
8356                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
8357                seed_cluster_member(&conn, "cl-multi", &mid);
8358            }
8359        }
8360        let (status, body) = runtime.block_on(call(
8361            h.router.clone(),
8362            "GET",
8363            &graph_uri_with_limit("cl:cl-multi", "cluster_member", 3),
8364            None,
8365        ));
8366        assert_eq!(status, StatusCode::OK, "body: {body}");
8367        let nodes = body["nodes"].as_array().unwrap();
8368        let edges = body["edges"].as_array().unwrap();
8369        assert_eq!(nodes.len(), 3, "limit honored: {body}");
8370        assert_eq!(edges.len(), 3);
8371        for n in nodes {
8372            assert_eq!(n["kind"], "episode");
8373        }
8374        h.shutdown(&runtime);
8375    }
8376
8377    #[test]
8378    fn expand_document_chunk_from_document_returns_chunks() {
8379        let runtime = rt();
8380        let h = Harness::new(&runtime);
8381        let doc_id = "33333333-3333-7000-8000-000000000001";
8382        {
8383            let conn = h.open_db();
8384            seed_document_row(&conn, doc_id, "doc A");
8385            // Insert chunks in shuffled order so the ORDER BY chunk_index
8386            // is load-bearing.
8387            seed_chunk_row(&conn, "c2", doc_id, 2, "chunk 2 text");
8388            seed_chunk_row(&conn, "c0", doc_id, 0, "chunk 0 text");
8389            seed_chunk_row(&conn, "c1", doc_id, 1, "chunk 1 text");
8390            seed_chunk_row(&conn, "c3", doc_id, 3, "chunk 3 text");
8391        }
8392        let node_id = format!("doc:{doc_id}");
8393        let (status, body) = runtime.block_on(call(
8394            h.router.clone(),
8395            "GET",
8396            &graph_uri(&node_id, "document_chunk"),
8397            None,
8398        ));
8399        assert_eq!(status, StatusCode::OK, "body: {body}");
8400        let nodes = body["nodes"].as_array().unwrap();
8401        let edges = body["edges"].as_array().unwrap();
8402        assert_eq!(nodes.len(), 4);
8403        assert_eq!(edges.len(), 4);
8404        // Verify in-order chunk_index emission.
8405        assert_eq!(nodes[0]["id"], "chunk:c0");
8406        assert_eq!(nodes[1]["id"], "chunk:c1");
8407        assert_eq!(nodes[2]["id"], "chunk:c2");
8408        assert_eq!(nodes[3]["id"], "chunk:c3");
8409        for e in edges {
8410            assert_eq!(e["kind"], "document_chunk");
8411        }
8412        h.shutdown(&runtime);
8413    }
8414
8415    #[test]
8416    fn expand_document_chunk_from_chunk_returns_parent_document() {
8417        let runtime = rt();
8418        let h = Harness::new(&runtime);
8419        let doc_id = "44444444-4444-7000-8000-000000000001";
8420        {
8421            let conn = h.open_db();
8422            seed_document_row(&conn, doc_id, "parent doc");
8423            seed_chunk_row(&conn, "c-orphan", doc_id, 0, "chunk content");
8424        }
8425        let (status, body) = runtime.block_on(call(
8426            h.router.clone(),
8427            "GET",
8428            &graph_uri("chunk:c-orphan", "document_chunk"),
8429            None,
8430        ));
8431        assert_eq!(status, StatusCode::OK, "body: {body}");
8432        let nodes = body["nodes"].as_array().unwrap();
8433        let edges = body["edges"].as_array().unwrap();
8434        assert_eq!(nodes.len(), 1);
8435        assert_eq!(edges.len(), 1);
8436        assert_eq!(nodes[0]["id"], format!("doc:{doc_id}"));
8437        assert_eq!(edges[0]["source"], "chunk:c-orphan");
8438        assert_eq!(edges[0]["target"], format!("doc:{doc_id}"));
8439        h.shutdown(&runtime);
8440    }
8441
8442    #[test]
8443    fn expand_triple_from_episode_returns_entities() {
8444        let runtime = rt();
8445        let h = Harness::new(&runtime);
8446        let memory_id = "55555555-5555-7000-8000-000000000001";
8447        let rowid;
8448        {
8449            let conn = h.open_db();
8450            rowid = seed_episode(&conn, memory_id, 100, "alice works at anthropic");
8451            // Two distinct triples → 4 entity endpoints (Alice, Anthropic, Bob, NYC).
8452            seed_triple_row(&conn, "t1", "Alice", "works_at", "Anthropic", Some(rowid));
8453            seed_triple_row(&conn, "t2", "Bob", "lives_in", "NYC", Some(rowid));
8454        }
8455        let node_id = format!("ep:{memory_id}");
8456        let (status, body) = runtime.block_on(call(
8457            h.router.clone(),
8458            "GET",
8459            &graph_uri(&node_id, "triple"),
8460            None,
8461        ));
8462        assert_eq!(status, StatusCode::OK, "body: {body}");
8463        let nodes = body["nodes"].as_array().unwrap();
8464        let edges = body["edges"].as_array().unwrap();
8465        assert_eq!(nodes.len(), 4, "expected 4 unique entity nodes: {body}");
8466        assert_eq!(edges.len(), 2);
8467        let ids: std::collections::HashSet<String> = nodes
8468            .iter()
8469            .map(|n| n["id"].as_str().unwrap().to_string())
8470            .collect();
8471        for expected in ["ent:Alice", "ent:Anthropic", "ent:Bob", "ent:NYC"] {
8472            assert!(ids.contains(expected), "missing {expected} in {body}");
8473        }
8474        for e in edges {
8475            assert_eq!(e["kind"], "triple");
8476            assert!(e["predicate"].is_string(), "predicate set: {body}");
8477        }
8478        h.shutdown(&runtime);
8479    }
8480
8481    #[test]
8482    fn expand_triple_from_entity_returns_episodes() {
8483        let runtime = rt();
8484        let h = Harness::new(&runtime);
8485        {
8486            let conn = h.open_db();
8487            let r1 = seed_episode(
8488                &conn,
8489                "66666666-6666-7000-8000-000000000001",
8490                100,
8491                "alice ep one",
8492            );
8493            let r2 = seed_episode(
8494                &conn,
8495                "66666666-6666-7000-8000-000000000002",
8496                200,
8497                "alice ep two",
8498            );
8499            let r3 = seed_episode(
8500                &conn,
8501                "66666666-6666-7000-8000-000000000003",
8502                300,
8503                "alice ep three",
8504            );
8505            // 3 triples all mentioning Alice on one side or another.
8506            seed_triple_row(&conn, "t1", "Alice", "p", "Bob", Some(r1));
8507            seed_triple_row(&conn, "t2", "Carol", "p", "Alice", Some(r2));
8508            seed_triple_row(&conn, "t3", "Alice", "q", "Dave", Some(r3));
8509            // One triple with no source — must be skipped by the IS NOT NULL filter.
8510            seed_triple_row(&conn, "t-orphan", "Alice", "p", "Eve", None);
8511        }
8512        let (status, body) = runtime.block_on(call(
8513            h.router.clone(),
8514            "GET",
8515            &graph_uri("ent:Alice", "triple"),
8516            None,
8517        ));
8518        assert_eq!(status, StatusCode::OK, "body: {body}");
8519        let nodes = body["nodes"].as_array().unwrap();
8520        let edges = body["edges"].as_array().unwrap();
8521        assert_eq!(nodes.len(), 3, "expected 3 episodes: {body}");
8522        assert_eq!(edges.len(), 3);
8523        for n in nodes {
8524            assert_eq!(n["kind"], "episode");
8525        }
8526        for e in edges {
8527            assert_eq!(e["source"], "ent:Alice");
8528            assert_eq!(e["kind"], "triple");
8529        }
8530        h.shutdown(&runtime);
8531    }
8532
8533    #[test]
8534    fn expand_semantic_from_episode_returns_similar() {
8535        let runtime = rt();
8536        let h = Harness::new(&runtime);
8537        // Seed three episodes via the writer-actor so they get embedded
8538        // + inserted into HNSW. StubEmbedder is deterministic: identical
8539        // content → identical vector → cos_distance = 0. So we use
8540        // distinct strings, then expand from one of them and assert at
8541        // least one similar peer comes back.
8542        runtime.block_on(async {
8543            let mid1 = post_remember(h.router.clone(), "alpha alpha alpha").await;
8544            let _mid2 = post_remember(h.router.clone(), "beta beta beta").await;
8545            let _mid3 = post_remember(h.router.clone(), "gamma gamma gamma").await;
8546            // Expand from mid1.
8547            let (status, body) = call(
8548                h.router.clone(),
8549                "GET",
8550                &graph_uri_with_limit(&format!("ep:{mid1}"), "semantic", 5),
8551                None,
8552            )
8553            .await;
8554            assert_eq!(status, StatusCode::OK, "body: {body}");
8555            let nodes = body["nodes"].as_array().unwrap();
8556            let edges = body["edges"].as_array().unwrap();
8557            // Must NOT include the source.
8558            for n in nodes {
8559                assert_ne!(
8560                    n["id"].as_str().unwrap(),
8561                    format!("ep:{mid1}"),
8562                    "self must be excluded: {body}"
8563                );
8564            }
8565            // Edges must be tagged semantic with a numeric weight.
8566            for e in edges {
8567                assert_eq!(e["kind"], "semantic");
8568                assert!(e["weight"].is_number(), "weight set: {body}");
8569            }
8570        });
8571        h.shutdown(&runtime);
8572    }
8573
8574    /// Helper: POST /memory and return the new memory_id.
8575    async fn post_remember(router: axum::Router, content: &str) -> String {
8576        let (status, body) = call(
8577            router,
8578            "POST",
8579            "/memory",
8580            Some(json!({ "content": content })),
8581        )
8582        .await;
8583        assert_eq!(status, StatusCode::OK, "post failed: {body}");
8584        body["memory_id"].as_str().unwrap().to_string()
8585    }
8586
8587    #[test]
8588    fn expand_400_on_invalid_kind() {
8589        let runtime = rt();
8590        let h = Harness::new(&runtime);
8591        let (status, _body) = runtime.block_on(call(
8592            h.router.clone(),
8593            "GET",
8594            "/v1/graph/expand?node_id=ep:any&kind=banana",
8595            None,
8596        ));
8597        // axum's Query extractor rejects unknown enum value with 400/422.
8598        assert!(
8599            status == StatusCode::BAD_REQUEST || status == StatusCode::UNPROCESSABLE_ENTITY,
8600            "expected 400/422 for bad kind, got {status}"
8601        );
8602        h.shutdown(&runtime);
8603    }
8604
8605    #[test]
8606    fn expand_400_on_invalid_node_for_kind() {
8607        let runtime = rt();
8608        let h = Harness::new(&runtime);
8609        // kind=semantic from a cluster source → 400.
8610        let (status, body) = runtime.block_on(call(
8611            h.router.clone(),
8612            "GET",
8613            &graph_uri("cl:doesnt-matter", "semantic"),
8614            None,
8615        ));
8616        assert_eq!(status, StatusCode::BAD_REQUEST);
8617        assert!(
8618            body["error"]
8619                .as_str()
8620                .is_some_and(|s| s.contains("semantic only valid for episode")),
8621            "got: {body}"
8622        );
8623        h.shutdown(&runtime);
8624    }
8625
8626    #[test]
8627    fn expand_404_on_missing_node_id() {
8628        let runtime = rt();
8629        let h = Harness::new(&runtime);
8630        let (status, body) = runtime.block_on(call(
8631            h.router.clone(),
8632            "GET",
8633            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
8634            None,
8635        ));
8636        assert_eq!(status, StatusCode::NOT_FOUND, "{body}");
8637        h.shutdown(&runtime);
8638    }
8639
8640    #[test]
8641    fn expand_limit_clamped_at_100() {
8642        let runtime = rt();
8643        let h = Harness::new(&runtime);
8644        // Seed > 100 cluster members so we can see the clamp in action.
8645        {
8646            let conn = h.open_db();
8647            seed_cluster_row(&conn, "cl-huge", 1_000);
8648            for i in 0..150 {
8649                let mid = format!("77777777-7777-7000-8000-{:012}", i);
8650                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
8651                seed_cluster_member(&conn, "cl-huge", &mid);
8652            }
8653        }
8654        let (status, body) = runtime.block_on(call(
8655            h.router.clone(),
8656            "GET",
8657            &graph_uri_with_limit("cl:cl-huge", "cluster_member", 999),
8658            None,
8659        ));
8660        assert_eq!(status, StatusCode::OK, "body: {body}");
8661        let nodes = body["nodes"].as_array().unwrap();
8662        assert_eq!(
8663            nodes.len(),
8664            100,
8665            "limit must be silently clamped to 100, got {}",
8666            nodes.len()
8667        );
8668        h.shutdown(&runtime);
8669    }
8670
8671    #[test]
8672    fn expand_bad_node_id_prefix_returns_400() {
8673        let runtime = rt();
8674        let h = Harness::new(&runtime);
8675        let (status, body) = runtime.block_on(call(
8676            h.router.clone(),
8677            "GET",
8678            "/v1/graph/expand?node_id=garbage&kind=cluster_member",
8679            None,
8680        ));
8681        assert_eq!(status, StatusCode::BAD_REQUEST);
8682        assert!(
8683            body["error"]
8684                .as_str()
8685                .is_some_and(|s| s.contains("node_id must be")),
8686            "got: {body}"
8687        );
8688        h.shutdown(&runtime);
8689    }
8690
8691    #[test]
8692    fn expand_respects_tenant_scoping_via_unknown_tenant_header() {
8693        // Routing via X-Solo-Tenant: a header pointing to an unknown
8694        // tenant must 404 before the handler even runs — the
8695        // TenantExtractor is the gatekeeper, so node ids can't be
8696        // resolved against the wrong tenant's DB.
8697        let runtime = rt();
8698        let h = Harness::new(&runtime);
8699        // Seed a real episode in the default tenant so we know it
8700        // exists there. If tenant scoping leaked, this lookup would 200
8701        // even with the wrong tenant header.
8702        let memory_id = "88888888-8888-7000-8000-000000000001";
8703        {
8704            let conn = h.open_db();
8705            seed_episode(&conn, memory_id, 100, "scoped");
8706            seed_cluster_row(&conn, "cl-scoped", 200);
8707            seed_cluster_member(&conn, "cl-scoped", memory_id);
8708        }
8709        let node_id = format!("ep:{memory_id}");
8710        let r = h.router.clone();
8711        let (status, _body) = runtime.block_on(async {
8712            let req = Request::builder()
8713                .method("GET")
8714                .uri(graph_uri(&node_id, "cluster_member"))
8715                .header("x-solo-tenant", "never-registered-tenant")
8716                .body(Body::empty())
8717                .unwrap();
8718            let resp = r.oneshot(req).await.expect("oneshot");
8719            let s = resp.status();
8720            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8721            (s, _b)
8722        });
8723        // Unknown tenant id → 404 from the registry. Confirms cross-tenant
8724        // lookups can't smuggle through this endpoint.
8725        assert_eq!(status, StatusCode::NOT_FOUND);
8726        h.shutdown(&runtime);
8727    }
8728
8729    #[test]
8730    fn expand_respects_auth_when_enabled() {
8731        let runtime = rt();
8732        let h = Harness::new_with_auth(&runtime, Some("graph-secret".into()));
8733        // No Authorization header → 401.
8734        let (status, _) = runtime.block_on(call(
8735            h.router.clone(),
8736            "GET",
8737            &graph_uri("ep:any", "cluster_member"),
8738            None,
8739        ));
8740        assert_eq!(status, StatusCode::UNAUTHORIZED);
8741        // Right token → handler runs (404 for unknown node, NOT 401).
8742        let (status, _) = runtime.block_on(call_with_auth(
8743            h.router.clone(),
8744            "GET",
8745            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
8746            None,
8747            Some("Bearer graph-secret"),
8748        ));
8749        assert_eq!(status, StatusCode::NOT_FOUND);
8750        h.shutdown(&runtime);
8751    }
8752
8753    #[test]
8754    fn expand_works_when_auth_none() {
8755        let runtime = rt();
8756        let h = Harness::new(&runtime);
8757        // Unauthenticated request hits the handler; 404 for unknown node
8758        // proves the auth-none path doesn't reject the request.
8759        let (status, _) = runtime.block_on(call(
8760            h.router.clone(),
8761            "GET",
8762            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
8763            None,
8764        ));
8765        assert_eq!(status, StatusCode::NOT_FOUND);
8766        h.shutdown(&runtime);
8767    }
8768
8769    // ---------------------------------------------------------------------
8770    // v0.10.0: GET /v1/graph/nodes + GET /v1/graph/edges
8771    //
8772    // Paginated catalog reads. Both endpoints share auth + tenant +
8773    // cursor scaffolding from /v1/graph/expand, so tests focus on the
8774    // new surface: filter parsing, entity synthesis cap, cursor round-
8775    // trip, edge-type defaults (semantic excluded), and the semantic
8776    // 400 redirect to /v1/graph/neighbors.
8777    // ---------------------------------------------------------------------
8778
8779    /// Lower-level helper that captures response headers in addition to
8780    /// status + JSON body. Used by the entity-cap header test.
8781    async fn call_with_headers(
8782        router: axum::Router,
8783        method: &str,
8784        uri: &str,
8785    ) -> (StatusCode, axum::http::HeaderMap, Value) {
8786        let req = Request::builder()
8787            .method(method)
8788            .uri(uri)
8789            .header("content-length", "0")
8790            .body(Body::empty())
8791            .unwrap();
8792        let resp = router.oneshot(req).await.expect("oneshot");
8793        let status = resp.status();
8794        let headers = resp.headers().clone();
8795        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
8796        let v: Value = if body_bytes.is_empty() {
8797            Value::Null
8798        } else {
8799            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
8800        };
8801        (status, headers, v)
8802    }
8803
8804    #[test]
8805    fn nodes_returns_all_kinds_when_no_filter() {
8806        let runtime = rt();
8807        let h = Harness::new(&runtime);
8808        {
8809            let conn = h.open_db();
8810            let rowid = seed_episode(
8811                &conn,
8812                "aaaaaaaa-0000-7000-8000-000000000001",
8813                100,
8814                "episode one",
8815            );
8816            seed_document_row(&conn, "doc-1", "doc one");
8817            seed_chunk_row(&conn, "chunk-1", "doc-1", 0, "chunk one body");
8818            seed_cluster_row(&conn, "cl-one", 200);
8819            seed_triple_row(&conn, "t-one", "Alice", "knows", "Bob", Some(rowid));
8820        }
8821        let (status, body) =
8822            runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/nodes", None));
8823        assert_eq!(status, StatusCode::OK, "body: {body}");
8824        let nodes = body["nodes"].as_array().unwrap();
8825        let kinds: std::collections::HashSet<&str> =
8826            nodes.iter().map(|n| n["kind"].as_str().unwrap()).collect();
8827        for expected in ["episode", "document", "chunk", "cluster", "entity"] {
8828            assert!(
8829                kinds.contains(expected),
8830                "expected {expected} kind in response: {body}"
8831            );
8832        }
8833        h.shutdown(&runtime);
8834    }
8835
8836    #[test]
8837    fn nodes_filter_by_single_kind() {
8838        let runtime = rt();
8839        let h = Harness::new(&runtime);
8840        {
8841            let conn = h.open_db();
8842            seed_episode(&conn, "bbbbbbbb-0000-7000-8000-000000000001", 100, "ep");
8843            seed_document_row(&conn, "doc-only", "d");
8844            seed_cluster_row(&conn, "cl-only", 300);
8845        }
8846        let (status, body) = runtime.block_on(call(
8847            h.router.clone(),
8848            "GET",
8849            "/v1/graph/nodes?kind=episode",
8850            None,
8851        ));
8852        assert_eq!(status, StatusCode::OK, "body: {body}");
8853        let nodes = body["nodes"].as_array().unwrap();
8854        assert!(!nodes.is_empty(), "{body}");
8855        for n in nodes {
8856            assert_eq!(
8857                n["kind"], "episode",
8858                "kind filter must be exclusive: {body}"
8859            );
8860        }
8861        h.shutdown(&runtime);
8862    }
8863
8864    #[test]
8865    fn nodes_filter_by_multiple_kinds() {
8866        let runtime = rt();
8867        let h = Harness::new(&runtime);
8868        {
8869            let conn = h.open_db();
8870            seed_episode(&conn, "cccccccc-0000-7000-8000-000000000001", 100, "ep");
8871            seed_document_row(&conn, "doc-multi", "d");
8872            seed_cluster_row(&conn, "cl-multi", 300);
8873        }
8874        let (status, body) = runtime.block_on(call(
8875            h.router.clone(),
8876            "GET",
8877            "/v1/graph/nodes?kind=episode,document",
8878            None,
8879        ));
8880        assert_eq!(status, StatusCode::OK, "body: {body}");
8881        let nodes = body["nodes"].as_array().unwrap();
8882        let kinds: std::collections::HashSet<&str> =
8883            nodes.iter().map(|n| n["kind"].as_str().unwrap()).collect();
8884        assert!(kinds.contains("episode"), "{body}");
8885        assert!(kinds.contains("document"), "{body}");
8886        assert!(
8887            !kinds.contains("cluster"),
8888            "cluster must be filtered out: {body}"
8889        );
8890        h.shutdown(&runtime);
8891    }
8892
8893    #[test]
8894    fn nodes_entity_synthesis_caps_at_200() {
8895        let runtime = rt();
8896        let h = Harness::new(&runtime);
8897        {
8898            let conn = h.open_db();
8899            // Seed one episode + 250 distinct triple object values so the
8900            // entity rollup surfaces >200 entities. ref_count is 1 for
8901            // each; pick subject = "Alice" for all so the entity count
8902            // collapses on subject (1 "Alice") + 250 distinct objects.
8903            let rowid = seed_episode(&conn, "dddddddd-0000-7000-8000-000000000001", 100, "ep");
8904            for i in 0..250 {
8905                let triple_id = format!("t-cap-{i:03}");
8906                let obj = format!("Entity{i:03}");
8907                seed_triple_row(&conn, &triple_id, "Alice", "knows", &obj, Some(rowid));
8908            }
8909        }
8910        let (status, headers, body) = runtime.block_on(call_with_headers(
8911            h.router.clone(),
8912            "GET",
8913            "/v1/graph/nodes?kind=entity&limit=500",
8914        ));
8915        assert_eq!(status, StatusCode::OK, "body: {body}");
8916        let nodes = body["nodes"].as_array().unwrap();
8917        assert_eq!(
8918            nodes.len(),
8919            200,
8920            "entity cap must be enforced at 200, got {}",
8921            nodes.len()
8922        );
8923        assert_eq!(
8924            headers
8925                .get("x-solo-entity-cap-reached")
8926                .and_then(|v| v.to_str().ok()),
8927            Some("true"),
8928            "cap-reached header missing: headers={headers:?}"
8929        );
8930        for n in nodes {
8931            assert_eq!(n["kind"], "entity");
8932        }
8933        h.shutdown(&runtime);
8934    }
8935
8936    #[test]
8937    fn nodes_since_until_filter_works() {
8938        let runtime = rt();
8939        let h = Harness::new(&runtime);
8940        {
8941            let conn = h.open_db();
8942            seed_episode(&conn, "eeeeeeee-0000-7000-8000-000000000001", 100, "early");
8943            seed_episode(&conn, "eeeeeeee-0000-7000-8000-000000000002", 500, "middle");
8944            seed_episode(&conn, "eeeeeeee-0000-7000-8000-000000000003", 1000, "late");
8945        }
8946        let (status, body) = runtime.block_on(call(
8947            h.router.clone(),
8948            "GET",
8949            "/v1/graph/nodes?kind=episode&since_ms=400&until_ms=600",
8950            None,
8951        ));
8952        assert_eq!(status, StatusCode::OK, "body: {body}");
8953        let nodes = body["nodes"].as_array().unwrap();
8954        assert_eq!(nodes.len(), 1, "{body}");
8955        assert_eq!(nodes[0]["id"], "ep:eeeeeeee-0000-7000-8000-000000000002");
8956        h.shutdown(&runtime);
8957    }
8958
8959    #[test]
8960    fn nodes_pagination_round_trip() {
8961        let runtime = rt();
8962        let h = Harness::new(&runtime);
8963        {
8964            let conn = h.open_db();
8965            for i in 0..150 {
8966                let mid = format!("f0000000-0000-7000-8000-{i:012}");
8967                // ts_ms scales with i so the sort order is deterministic;
8968                // newest (highest i) appears first.
8969                seed_episode(&conn, &mid, 1_000 + i as i64, "page");
8970            }
8971        }
8972        let limit = 50u32;
8973        let mut seen: std::collections::HashSet<String> = Default::default();
8974        let mut next_cursor: Option<String> = None;
8975        for page_idx in 0..4 {
8976            let cursor_param = next_cursor
8977                .as_deref()
8978                .map(|c| format!("&cursor={c}"))
8979                .unwrap_or_default();
8980            let uri = format!("/v1/graph/nodes?kind=episode&limit={limit}{cursor_param}");
8981            let (status, body) = runtime.block_on(call(h.router.clone(), "GET", &uri, None));
8982            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
8983            let nodes = body["nodes"].as_array().unwrap();
8984            assert!(
8985                nodes.len() <= limit as usize,
8986                "page {page_idx} over-fetched: {body}"
8987            );
8988            for n in nodes {
8989                let id = n["id"].as_str().unwrap().to_string();
8990                assert!(seen.insert(id.clone()), "duplicate id across pages: {id}");
8991            }
8992            next_cursor = body
8993                .get("next_cursor")
8994                .and_then(|v| v.as_str())
8995                .map(|s| s.to_string());
8996            if next_cursor.is_none() {
8997                break;
8998            }
8999        }
9000        assert_eq!(
9001            seen.len(),
9002            150,
9003            "expected 150 distinct ids across pages, got {}",
9004            seen.len()
9005        );
9006        assert!(
9007            next_cursor.is_none(),
9008            "cursor should be null after last page; got {next_cursor:?}"
9009        );
9010        h.shutdown(&runtime);
9011    }
9012
9013    #[test]
9014    fn nodes_respects_tenant_scoping() {
9015        let runtime = rt();
9016        let h = Harness::new(&runtime);
9017        {
9018            let conn = h.open_db();
9019            seed_episode(
9020                &conn,
9021                "11110000-0000-7000-8000-000000000001",
9022                100,
9023                "tenant scope",
9024            );
9025        }
9026        // Request against a never-registered tenant header → 404 from
9027        // the tenant extractor before the handler runs.
9028        let r = h.router.clone();
9029        let (status, _body) = runtime.block_on(async {
9030            let req = Request::builder()
9031                .method("GET")
9032                .uri("/v1/graph/nodes")
9033                .header("x-solo-tenant", "never-registered-tenant")
9034                .body(Body::empty())
9035                .unwrap();
9036            let resp = r.oneshot(req).await.expect("oneshot");
9037            let s = resp.status();
9038            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9039            (s, _b)
9040        });
9041        assert_eq!(status, StatusCode::NOT_FOUND);
9042        h.shutdown(&runtime);
9043    }
9044
9045    #[test]
9046    fn nodes_respects_auth_when_enabled() {
9047        let runtime = rt();
9048        let h = Harness::new_with_auth(&runtime, Some("nodes-secret".into()));
9049        let (status, _) = runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/nodes", None));
9050        assert_eq!(
9051            status,
9052            StatusCode::UNAUTHORIZED,
9053            "must reject unauthenticated request"
9054        );
9055        let (status, _) = runtime.block_on(call_with_auth(
9056            h.router.clone(),
9057            "GET",
9058            "/v1/graph/nodes",
9059            None,
9060            Some("Bearer nodes-secret"),
9061        ));
9062        assert_eq!(status, StatusCode::OK, "must pass through with bearer");
9063        h.shutdown(&runtime);
9064    }
9065
9066    #[test]
9067    fn nodes_works_with_auth_none() {
9068        let runtime = rt();
9069        let h = Harness::new(&runtime);
9070        let (status, body) =
9071            runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/nodes", None));
9072        assert_eq!(status, StatusCode::OK, "{body}");
9073        assert!(body.get("nodes").is_some());
9074        h.shutdown(&runtime);
9075    }
9076
9077    // --- /v1/graph/edges ---
9078
9079    #[test]
9080    fn edges_returns_all_default_kinds() {
9081        let runtime = rt();
9082        let h = Harness::new(&runtime);
9083        {
9084            let conn = h.open_db();
9085            let rowid = seed_episode(&conn, "22220000-0000-7000-8000-000000000001", 100, "ep src");
9086            seed_triple_row(&conn, "t-def", "Alice", "knows", "Bob", Some(rowid));
9087            seed_document_row(&conn, "doc-e", "doc");
9088            seed_chunk_row(&conn, "c-e", "doc-e", 0, "chunk");
9089            seed_cluster_row(&conn, "cl-e", 200);
9090            seed_cluster_member(&conn, "cl-e", "22220000-0000-7000-8000-000000000001");
9091        }
9092        let (status, body) =
9093            runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/edges", None));
9094        assert_eq!(status, StatusCode::OK, "body: {body}");
9095        let edges = body["edges"].as_array().unwrap();
9096        let kinds: std::collections::HashSet<&str> =
9097            edges.iter().map(|e| e["kind"].as_str().unwrap()).collect();
9098        assert!(kinds.contains("triple"), "{body}");
9099        assert!(kinds.contains("document_chunk"), "{body}");
9100        assert!(kinds.contains("cluster_member"), "{body}");
9101        assert!(
9102            !kinds.contains("semantic"),
9103            "semantic is NOT in default response: {body}"
9104        );
9105        h.shutdown(&runtime);
9106    }
9107
9108    #[test]
9109    fn edges_filter_by_node_id_finds_incident_edges() {
9110        let runtime = rt();
9111        let h = Harness::new(&runtime);
9112        let memory_id = "33330000-0000-7000-8000-000000000001";
9113        {
9114            let conn = h.open_db();
9115            let rowid = seed_episode(&conn, memory_id, 100, "ep multi-triple");
9116            seed_triple_row(&conn, "t-a", "Alice", "p", "Bob", Some(rowid));
9117            seed_triple_row(&conn, "t-b", "Alice", "p", "Carol", Some(rowid));
9118            seed_triple_row(&conn, "t-c", "Alice", "p", "Dave", Some(rowid));
9119            // Decoy episode with its own triple — must NOT come back.
9120            let decoy_rowid =
9121                seed_episode(&conn, "33330000-0000-7000-8000-000000000999", 200, "decoy");
9122            seed_triple_row(&conn, "t-decoy", "Alice", "p", "Eve", Some(decoy_rowid));
9123        }
9124        let uri = format!(
9125            "/v1/graph/edges?type=triple&node_id={}",
9126            percent_encode_node_id(&format!("ep:{memory_id}"))
9127        );
9128        let (status, body) = runtime.block_on(call(h.router.clone(), "GET", &uri, None));
9129        assert_eq!(status, StatusCode::OK, "body: {body}");
9130        let edges = body["edges"].as_array().unwrap();
9131        assert_eq!(edges.len(), 3, "expected 3 incident edges: {body}");
9132        for e in edges {
9133            assert_eq!(e["source"], format!("ep:{memory_id}"));
9134            assert_eq!(e["kind"], "triple");
9135        }
9136        h.shutdown(&runtime);
9137    }
9138
9139    #[test]
9140    fn edges_filter_by_type_works() {
9141        let runtime = rt();
9142        let h = Harness::new(&runtime);
9143        {
9144            let conn = h.open_db();
9145            let rowid = seed_episode(&conn, "44440000-0000-7000-8000-000000000001", 100, "ep");
9146            seed_triple_row(&conn, "t-only", "Alice", "p", "Bob", Some(rowid));
9147            seed_document_row(&conn, "doc-skip", "doc");
9148            seed_chunk_row(&conn, "c-skip", "doc-skip", 0, "chunk");
9149        }
9150        let (status, body) = runtime.block_on(call(
9151            h.router.clone(),
9152            "GET",
9153            "/v1/graph/edges?type=triple",
9154            None,
9155        ));
9156        assert_eq!(status, StatusCode::OK, "{body}");
9157        let edges = body["edges"].as_array().unwrap();
9158        assert!(!edges.is_empty(), "{body}");
9159        for e in edges {
9160            assert_eq!(e["kind"], "triple", "{body}");
9161        }
9162        h.shutdown(&runtime);
9163    }
9164
9165    #[test]
9166    fn edges_rejects_semantic_type_with_400() {
9167        let runtime = rt();
9168        let h = Harness::new(&runtime);
9169        let (status, body) = runtime.block_on(call(
9170            h.router.clone(),
9171            "GET",
9172            "/v1/graph/edges?type=semantic",
9173            None,
9174        ));
9175        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
9176        let err = body["error"].as_str().unwrap_or_default();
9177        assert!(
9178            err.contains("/v1/graph/neighbors"),
9179            "error must point to /v1/graph/neighbors: {body}"
9180        );
9181        h.shutdown(&runtime);
9182    }
9183
9184    #[test]
9185    fn edges_pagination_round_trip() {
9186        let runtime = rt();
9187        let h = Harness::new(&runtime);
9188        {
9189            let conn = h.open_db();
9190            let rowid = seed_episode(&conn, "55550000-0000-7000-8000-000000000001", 100, "ep big");
9191            // 60 triples → 60 triple edges. limit=25 → 3 pages.
9192            for i in 0..60 {
9193                let tid = format!("t-page-{i:03}");
9194                let obj = format!("Obj{i:03}");
9195                seed_triple_row(&conn, &tid, "Alice", "p", &obj, Some(rowid));
9196            }
9197        }
9198        let limit = 25u32;
9199        let mut seen: std::collections::HashSet<String> = Default::default();
9200        let mut next_cursor: Option<String> = None;
9201        for page_idx in 0..5 {
9202            let cursor_param = next_cursor
9203                .as_deref()
9204                .map(|c| format!("&cursor={c}"))
9205                .unwrap_or_default();
9206            let uri = format!("/v1/graph/edges?type=triple&limit={limit}{cursor_param}");
9207            let (status, body) = runtime.block_on(call(h.router.clone(), "GET", &uri, None));
9208            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
9209            let edges = body["edges"].as_array().unwrap();
9210            for e in edges {
9211                let id = e["id"].as_str().unwrap().to_string();
9212                assert!(seen.insert(id.clone()), "duplicate edge id: {id}");
9213            }
9214            next_cursor = body
9215                .get("next_cursor")
9216                .and_then(|v| v.as_str())
9217                .map(|s| s.to_string());
9218            if next_cursor.is_none() {
9219                break;
9220            }
9221        }
9222        assert_eq!(
9223            seen.len(),
9224            60,
9225            "expected 60 distinct edges, got {}",
9226            seen.len()
9227        );
9228        assert!(next_cursor.is_none(), "expected exhausted cursor");
9229        h.shutdown(&runtime);
9230    }
9231
9232    #[test]
9233    fn edges_respects_tenant_scoping() {
9234        let runtime = rt();
9235        let h = Harness::new(&runtime);
9236        {
9237            let conn = h.open_db();
9238            let rowid = seed_episode(&conn, "66660000-0000-7000-8000-000000000001", 100, "ep");
9239            seed_triple_row(&conn, "t-tenant", "Alice", "p", "Bob", Some(rowid));
9240        }
9241        let r = h.router.clone();
9242        let (status, _) = runtime.block_on(async {
9243            let req = Request::builder()
9244                .method("GET")
9245                .uri("/v1/graph/edges")
9246                .header("x-solo-tenant", "never-registered-tenant")
9247                .body(Body::empty())
9248                .unwrap();
9249            let resp = r.oneshot(req).await.expect("oneshot");
9250            let s = resp.status();
9251            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9252            (s, _b)
9253        });
9254        assert_eq!(status, StatusCode::NOT_FOUND);
9255        h.shutdown(&runtime);
9256    }
9257
9258    #[test]
9259    fn edges_respects_auth_when_enabled() {
9260        let runtime = rt();
9261        let h = Harness::new_with_auth(&runtime, Some("edges-secret".into()));
9262        let (status, _) = runtime.block_on(call(h.router.clone(), "GET", "/v1/graph/edges", None));
9263        assert_eq!(status, StatusCode::UNAUTHORIZED);
9264        let (status, _) = runtime.block_on(call_with_auth(
9265            h.router.clone(),
9266            "GET",
9267            "/v1/graph/edges",
9268            None,
9269            Some("Bearer edges-secret"),
9270        ));
9271        assert_eq!(status, StatusCode::OK);
9272        h.shutdown(&runtime);
9273    }
9274
9275    // ---------------------------------------------------------------------
9276    // v0.10.0: GET /v1/graph/inspect/{id}
9277    //
9278    // Kind-discriminated full-record drill. Shares auth + tenant + node-id
9279    // prefix scaffolding with /v1/graph/expand and /v1/graph/{nodes,edges},
9280    // so tests focus on the new surface: per-kind full_text source +
9281    // triples_in/out shape + entity zero-triple 404 semantics + the
9282    // standard 400/404/auth/tenant cases.
9283    // ---------------------------------------------------------------------
9284
9285    fn inspect_uri(node_id: &str) -> String {
9286        // Path parameter must be percent-encoded (`:` is `%3A` after
9287        // the URI parser splits segments). axum's Path<String>
9288        // extractor percent-decodes automatically.
9289        format!("/v1/graph/inspect/{}", percent_encode_node_id(node_id))
9290    }
9291
9292    #[test]
9293    fn inspect_episode_returns_full_text_plus_triples_out() {
9294        let runtime = rt();
9295        let h = Harness::new(&runtime);
9296        let memory_id = "a1110000-0000-7000-8000-000000000001";
9297        let full_text = "Met Alice for coffee at the new place. She mentioned the project is on track but they're hitting issues with the deploy pipeline.";
9298        {
9299            let conn = h.open_db();
9300            let rowid = seed_episode(&conn, memory_id, 1_715_625_600_000, full_text);
9301            seed_triple_row(&conn, "t-ep-1", "user", "met_with", "Alice", Some(rowid));
9302            seed_triple_row(
9303                &conn,
9304                "t-ep-2",
9305                "user",
9306                "discussed",
9307                "deploy_pipeline",
9308                Some(rowid),
9309            );
9310            seed_triple_row(&conn, "t-ep-3", "Alice", "works_on", "project", Some(rowid));
9311        }
9312        let (status, body) = runtime.block_on(call(
9313            h.router.clone(),
9314            "GET",
9315            &inspect_uri(&format!("ep:{memory_id}")),
9316            None,
9317        ));
9318        assert_eq!(status, StatusCode::OK, "body: {body}");
9319        assert_eq!(body["node"]["kind"], "episode");
9320        assert_eq!(body["node"]["id"], format!("ep:{memory_id}"));
9321        assert_eq!(
9322            body["full_text"].as_str().unwrap(),
9323            full_text,
9324            "full_text must match episodes.content verbatim, untruncated"
9325        );
9326        let triples_out = body["triples_out"].as_array().unwrap();
9327        assert_eq!(triples_out.len(), 3, "{body}");
9328        let triples_in = body["triples_in"].as_array().unwrap();
9329        assert!(triples_in.is_empty(), "episodes have no triples_in: {body}");
9330        for e in triples_out {
9331            assert_eq!(e["kind"], "triple");
9332            assert_eq!(e["source"], format!("ep:{memory_id}"));
9333            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
9334            assert!(e["predicate"].as_str().is_some());
9335            assert!(e["weight"].as_f64().is_some());
9336        }
9337        h.shutdown(&runtime);
9338    }
9339
9340    #[test]
9341    fn inspect_episode_triples_in_is_empty_for_v10p1() {
9342        // Seed an episode + a triple from a DIFFERENT episode that
9343        // happens to mention the focal episode's content. Even with
9344        // entities referencing the episode topic, episode.triples_in
9345        // is structurally empty in v0.10.0 P1.
9346        let runtime = rt();
9347        let h = Harness::new(&runtime);
9348        let focal = "a2220000-0000-7000-8000-000000000001";
9349        let other = "a2220000-0000-7000-8000-000000000002";
9350        {
9351            let conn = h.open_db();
9352            seed_episode(&conn, focal, 100, "focal episode body");
9353            let other_rowid = seed_episode(&conn, other, 200, "another episode");
9354            // Entity "user" gets referenced heavily; doesn't matter --
9355            // episode triples_in stays empty.
9356            for i in 0..5 {
9357                let tid = format!("t-other-{i}");
9358                seed_triple_row(&conn, &tid, "user", "did", "thing", Some(other_rowid));
9359            }
9360        }
9361        let (status, body) = runtime.block_on(call(
9362            h.router.clone(),
9363            "GET",
9364            &inspect_uri(&format!("ep:{focal}")),
9365            None,
9366        ));
9367        assert_eq!(status, StatusCode::OK, "body: {body}");
9368        let triples_in = body["triples_in"].as_array().unwrap();
9369        assert!(
9370            triples_in.is_empty(),
9371            "episode triples_in must be empty regardless of cross-episode entity references: {body}"
9372        );
9373        h.shutdown(&runtime);
9374    }
9375
9376    #[test]
9377    fn inspect_document_returns_full_text_concatenated_from_chunks() {
9378        let runtime = rt();
9379        let h = Harness::new(&runtime);
9380        let doc_id = "d3330000-0000-7000-8000-000000000001";
9381        {
9382            let conn = h.open_db();
9383            seed_document_row(&conn, doc_id, "doc-title");
9384            seed_chunk_row(&conn, "ch-doc-1", doc_id, 0, "First chunk body.");
9385            seed_chunk_row(&conn, "ch-doc-2", doc_id, 1, "Second chunk body.");
9386            seed_chunk_row(&conn, "ch-doc-3", doc_id, 2, "Third chunk body.");
9387        }
9388        let (status, body) = runtime.block_on(call(
9389            h.router.clone(),
9390            "GET",
9391            &inspect_uri(&format!("doc:{doc_id}")),
9392            None,
9393        ));
9394        assert_eq!(status, StatusCode::OK, "body: {body}");
9395        assert_eq!(body["node"]["kind"], "document");
9396        let full_text = body["full_text"].as_str().unwrap();
9397        // Concatenation order matches chunk_index ASC; separator is "\n\n".
9398        assert_eq!(
9399            full_text,
9400            "First chunk body.\n\nSecond chunk body.\n\nThird chunk body."
9401        );
9402        assert!(body["triples_in"].as_array().unwrap().is_empty());
9403        assert!(body["triples_out"].as_array().unwrap().is_empty());
9404        h.shutdown(&runtime);
9405    }
9406
9407    #[test]
9408    fn inspect_chunk_returns_text() {
9409        let runtime = rt();
9410        let h = Harness::new(&runtime);
9411        let chunk_body = "This is the body of the chunk being inspected.";
9412        {
9413            let conn = h.open_db();
9414            seed_document_row(&conn, "doc-chunk-host", "host");
9415            seed_chunk_row(
9416                &conn,
9417                "chunk-inspect-target",
9418                "doc-chunk-host",
9419                0,
9420                chunk_body,
9421            );
9422        }
9423        let (status, body) = runtime.block_on(call(
9424            h.router.clone(),
9425            "GET",
9426            &inspect_uri("chunk:chunk-inspect-target"),
9427            None,
9428        ));
9429        assert_eq!(status, StatusCode::OK, "body: {body}");
9430        assert_eq!(body["node"]["kind"], "chunk");
9431        assert_eq!(body["full_text"].as_str().unwrap(), chunk_body);
9432        assert!(body["triples_in"].as_array().unwrap().is_empty());
9433        assert!(body["triples_out"].as_array().unwrap().is_empty());
9434        h.shutdown(&runtime);
9435    }
9436
9437    #[test]
9438    fn inspect_cluster_returns_label_and_abstraction() {
9439        let runtime = rt();
9440        let h = Harness::new(&runtime);
9441        let cluster_id = "cl-inspect-target";
9442        let abstraction_text = "Discussions about the deploy pipeline and on-call rotation.";
9443        {
9444            let conn = h.open_db();
9445            seed_cluster_row(&conn, cluster_id, 12345);
9446            seed_abstraction_row(&conn, "abs-1", cluster_id, abstraction_text);
9447        }
9448        let (status, body) = runtime.block_on(call(
9449            h.router.clone(),
9450            "GET",
9451            &inspect_uri(&format!("cl:{cluster_id}")),
9452            None,
9453        ));
9454        assert_eq!(status, StatusCode::OK, "body: {body}");
9455        assert_eq!(body["node"]["kind"], "cluster");
9456        let full_text = body["full_text"].as_str().unwrap();
9457        assert!(
9458            full_text.contains(cluster_id),
9459            "full_text must include cluster label: {full_text}"
9460        );
9461        assert!(
9462            full_text.contains(abstraction_text),
9463            "full_text must include abstraction text: {full_text}"
9464        );
9465        // "label\n\nabstraction" -- separated by blank line for the
9466        // inspector renderer.
9467        assert!(
9468            full_text.contains("\n\n"),
9469            "label and abstraction must be separated: {full_text}"
9470        );
9471        h.shutdown(&runtime);
9472    }
9473
9474    #[test]
9475    fn inspect_entity_returns_triples_only() {
9476        let runtime = rt();
9477        let h = Harness::new(&runtime);
9478        {
9479            let conn = h.open_db();
9480            let rowid = seed_episode(
9481                &conn,
9482                "e5550000-0000-7000-8000-000000000001",
9483                100,
9484                "host episode",
9485            );
9486            // 5 triples that reference Alice (as subject or object).
9487            seed_triple_row(&conn, "t-ent-1", "Alice", "knows", "Bob", Some(rowid));
9488            seed_triple_row(
9489                &conn,
9490                "t-ent-2",
9491                "Alice",
9492                "works_at",
9493                "Anthropic",
9494                Some(rowid),
9495            );
9496            seed_triple_row(&conn, "t-ent-3", "user", "met", "Alice", Some(rowid));
9497            seed_triple_row(&conn, "t-ent-4", "Alice", "owns", "laptop", Some(rowid));
9498            seed_triple_row(&conn, "t-ent-5", "Carol", "mentors", "Alice", Some(rowid));
9499        }
9500        let (status, body) = runtime.block_on(call(
9501            h.router.clone(),
9502            "GET",
9503            &inspect_uri("ent:Alice"),
9504            None,
9505        ));
9506        assert_eq!(status, StatusCode::OK, "body: {body}");
9507        assert_eq!(body["node"]["kind"], "entity");
9508        assert_eq!(body["node"]["id"], "ent:Alice");
9509        assert!(
9510            body["full_text"].is_null(),
9511            "entity full_text must be null (entities have no body): {body}"
9512        );
9513        let triples_out = body["triples_out"].as_array().unwrap();
9514        assert_eq!(triples_out.len(), 5, "{body}");
9515        assert!(body["triples_in"].as_array().unwrap().is_empty());
9516        for e in triples_out {
9517            assert_eq!(e["kind"], "triple");
9518            assert_eq!(e["source"], "ent:Alice");
9519            // Counterpart is always an entity; Alice never appears on
9520            // both ends so target != source.
9521            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
9522            assert_ne!(e["target"], "ent:Alice");
9523        }
9524        h.shutdown(&runtime);
9525    }
9526
9527    #[test]
9528    fn inspect_entity_with_zero_triples_returns_404() {
9529        let runtime = rt();
9530        let h = Harness::new(&runtime);
9531        // Seed unrelated triples so the table isn't empty; the target
9532        // entity still has zero references.
9533        {
9534            let conn = h.open_db();
9535            let rowid = seed_episode(&conn, "e6660000-0000-7000-8000-000000000001", 100, "ep");
9536            seed_triple_row(&conn, "t-other", "Bob", "knows", "Carol", Some(rowid));
9537        }
9538        let (status, body) = runtime.block_on(call(
9539            h.router.clone(),
9540            "GET",
9541            &inspect_uri("ent:Nonexistent"),
9542            None,
9543        ));
9544        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
9545        let err = body["error"].as_str().unwrap_or_default();
9546        assert!(
9547            err.contains("Nonexistent") || err.contains("entity"),
9548            "error must mention entity: {body}"
9549        );
9550        h.shutdown(&runtime);
9551    }
9552
9553    #[test]
9554    fn inspect_404_on_missing_node() {
9555        // Well-formed `ep:` prefix + valid UUID shape, but no row in DB.
9556        let runtime = rt();
9557        let h = Harness::new(&runtime);
9558        let (status, body) = runtime.block_on(call(
9559            h.router.clone(),
9560            "GET",
9561            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
9562            None,
9563        ));
9564        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
9565        h.shutdown(&runtime);
9566    }
9567
9568    #[test]
9569    fn inspect_400_on_invalid_prefix() {
9570        let runtime = rt();
9571        let h = Harness::new(&runtime);
9572        let (status, body) =
9573            runtime.block_on(call(h.router.clone(), "GET", &inspect_uri("xyz:foo"), None));
9574        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
9575        let err = body["error"].as_str().unwrap_or_default();
9576        assert!(
9577            err.contains("xyz") || err.contains("prefix"),
9578            "error must mention bad prefix: {body}"
9579        );
9580        h.shutdown(&runtime);
9581    }
9582
9583    #[test]
9584    fn inspect_respects_tenant_scoping() {
9585        let runtime = rt();
9586        let h = Harness::new(&runtime);
9587        let memory_id = "a7770000-0000-7000-8000-000000000001";
9588        {
9589            let conn = h.open_db();
9590            seed_episode(&conn, memory_id, 100, "tenant scope");
9591        }
9592        // Real id in default tenant resolves; the same request against
9593        // a never-registered tenant header surfaces 404 from the tenant
9594        // extractor before the handler runs.
9595        let r = h.router.clone();
9596        let (status, _) = runtime.block_on(async {
9597            let req = Request::builder()
9598                .method("GET")
9599                .uri(inspect_uri(&format!("ep:{memory_id}")))
9600                .header("x-solo-tenant", "never-registered-tenant")
9601                .body(Body::empty())
9602                .unwrap();
9603            let resp = r.oneshot(req).await.expect("oneshot");
9604            let s = resp.status();
9605            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9606            (s, _b)
9607        });
9608        assert_eq!(status, StatusCode::NOT_FOUND);
9609        // Sanity: same id resolves on the default tenant.
9610        let (status, body) = runtime.block_on(call(
9611            h.router.clone(),
9612            "GET",
9613            &inspect_uri(&format!("ep:{memory_id}")),
9614            None,
9615        ));
9616        assert_eq!(
9617            status,
9618            StatusCode::OK,
9619            "default tenant must resolve: {body}"
9620        );
9621        h.shutdown(&runtime);
9622    }
9623
9624    #[test]
9625    fn inspect_respects_auth_when_enabled() {
9626        let runtime = rt();
9627        let h = Harness::new_with_auth(&runtime, Some("inspect-secret".into()));
9628        // Missing bearer -> 401 before handler runs.
9629        let (status, _) = runtime.block_on(call(
9630            h.router.clone(),
9631            "GET",
9632            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
9633            None,
9634        ));
9635        assert_eq!(status, StatusCode::UNAUTHORIZED);
9636        // Valid bearer + unknown node -> handler runs and returns 404,
9637        // proving auth passed through.
9638        let (status, _) = runtime.block_on(call_with_auth(
9639            h.router.clone(),
9640            "GET",
9641            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
9642            None,
9643            Some("Bearer inspect-secret"),
9644        ));
9645        assert_eq!(status, StatusCode::NOT_FOUND);
9646        h.shutdown(&runtime);
9647    }
9648
9649    // ---------------------------------------------------------------------
9650    // v0.10.0: GET /v1/graph/neighbors/{id}
9651    //
9652    // Unified explicit + HNSW-semantic neighbor surface for solo-web's
9653    // "show similar" overlay. Tests cover the kind dispatch (explicit /
9654    // semantic / both default), threshold filter, limit clamp, dedupe
9655    // rule, and the standard 400/404/auth/tenant gates.
9656    // ---------------------------------------------------------------------
9657
9658    /// URL builder for the neighbors endpoint. `kind`/`threshold`/`limit`
9659    /// are all optional; pass `None` to omit the corresponding query
9660    /// parameter. The node id is percent-encoded so `:` survives the path
9661    /// extractor.
9662    fn neighbors_uri(
9663        node_id: &str,
9664        kind: Option<&str>,
9665        threshold: Option<f32>,
9666        limit: Option<u32>,
9667    ) -> String {
9668        let mut qs: Vec<String> = Vec::new();
9669        if let Some(k) = kind {
9670            qs.push(format!("kind={k}"));
9671        }
9672        if let Some(t) = threshold {
9673            qs.push(format!("threshold={t}"));
9674        }
9675        if let Some(l) = limit {
9676            qs.push(format!("limit={l}"));
9677        }
9678        let encoded = percent_encode_node_id(node_id);
9679        if qs.is_empty() {
9680            format!("/v1/graph/neighbors/{encoded}")
9681        } else {
9682            format!("/v1/graph/neighbors/{encoded}?{}", qs.join("&"))
9683        }
9684    }
9685
9686    /// 1. `?kind=explicit` returns only structural edges (no semantic).
9687    /// Seeds an episode with 2 explicit (triple) neighbors + several
9688    /// distinct other episodes so the semantic path COULD surface
9689    /// candidates. The `kind=explicit` filter must drop all of them.
9690    #[test]
9691    fn neighbors_explicit_only_returns_no_semantic_edges() {
9692        let runtime = rt();
9693        let h = Harness::new(&runtime);
9694        runtime.block_on(async {
9695            // Seed several episodes via the writer-actor so they get HNSW
9696            // entries -- the semantic path would surface these if it
9697            // wasn't filtered out.
9698            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9699            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
9700            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
9701            // Add explicit triples sourced from `focal`. seed_triple_row
9702            // needs the focal rowid -- look it up via a side connection.
9703            {
9704                let conn = h.open_db();
9705                let rowid: i64 = conn
9706                    .query_row(
9707                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9708                        rusqlite::params![&focal],
9709                        |r| r.get(0),
9710                    )
9711                    .unwrap();
9712                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
9713                seed_triple_row(&conn, "t-exp-2", "Alice", "owns", "laptop", Some(rowid));
9714            }
9715            let (status, body) = call(
9716                h.router.clone(),
9717                "GET",
9718                &neighbors_uri(&format!("ep:{focal}"), Some("explicit"), None, None),
9719                None,
9720            )
9721            .await;
9722            assert_eq!(status, StatusCode::OK, "body: {body}");
9723            let edges = body["edges"].as_array().unwrap();
9724            assert!(!edges.is_empty(), "expected explicit edges: {body}");
9725            for e in edges {
9726                assert_ne!(
9727                    e["kind"], "semantic",
9728                    "kind=explicit must drop semantic edges: {body}"
9729                );
9730            }
9731        });
9732        h.shutdown(&runtime);
9733    }
9734
9735    /// 2. `?kind=semantic` returns only HNSW edges (no explicit).
9736    /// Inverse of test 1 -- same fixture, opposite filter.
9737    #[test]
9738    fn neighbors_semantic_only_returns_no_explicit_edges() {
9739        let runtime = rt();
9740        let h = Harness::new(&runtime);
9741        runtime.block_on(async {
9742            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9743            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
9744            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
9745            {
9746                let conn = h.open_db();
9747                let rowid: i64 = conn
9748                    .query_row(
9749                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9750                        rusqlite::params![&focal],
9751                        |r| r.get(0),
9752                    )
9753                    .unwrap();
9754                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
9755            }
9756            // Threshold=0 so every HNSW hit clears the filter.
9757            let (status, body) = call(
9758                h.router.clone(),
9759                "GET",
9760                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
9761                None,
9762            )
9763            .await;
9764            assert_eq!(status, StatusCode::OK, "body: {body}");
9765            let edges = body["edges"].as_array().unwrap();
9766            for e in edges {
9767                assert_eq!(
9768                    e["kind"], "semantic",
9769                    "kind=semantic must drop explicit edges: {body}"
9770                );
9771                assert!(
9772                    e["weight"].is_number(),
9773                    "semantic edges carry weight: {body}"
9774                );
9775            }
9776        });
9777        h.shutdown(&runtime);
9778    }
9779
9780    /// 3. Default (no `kind=` param) returns both explicit + semantic.
9781    #[test]
9782    fn neighbors_both_default_returns_combined() {
9783        let runtime = rt();
9784        let h = Harness::new(&runtime);
9785        runtime.block_on(async {
9786            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9787            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
9788            {
9789                let conn = h.open_db();
9790                let rowid: i64 = conn
9791                    .query_row(
9792                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9793                        rusqlite::params![&focal],
9794                        |r| r.get(0),
9795                    )
9796                    .unwrap();
9797                seed_triple_row(&conn, "t-both-1", "Alice", "met", "Bob", Some(rowid));
9798            }
9799            let (status, body) = call(
9800                h.router.clone(),
9801                "GET",
9802                // No kind param -> default = both. Threshold 0 so semantic
9803                // hits make it through the filter.
9804                &neighbors_uri(&format!("ep:{focal}"), None, Some(0.0), None),
9805                None,
9806            )
9807            .await;
9808            assert_eq!(status, StatusCode::OK, "body: {body}");
9809            let edges = body["edges"].as_array().unwrap();
9810            let kinds: std::collections::HashSet<&str> =
9811                edges.iter().map(|e| e["kind"].as_str().unwrap()).collect();
9812            assert!(
9813                kinds.contains("triple"),
9814                "expected at least one triple edge: {body}"
9815            );
9816            assert!(
9817                kinds.contains("semantic"),
9818                "expected at least one semantic edge: {body}"
9819            );
9820        });
9821        h.shutdown(&runtime);
9822    }
9823
9824    /// 4. Dedupe rule. Construct an episode X whose semantic-neighbor Y
9825    /// is ALSO a triple-target -- i.e. the explicit and semantic paths
9826    /// both produce an edge X -> Y. After dedupe only the explicit edge
9827    /// survives.
9828    #[test]
9829    fn neighbors_dedupes_semantic_when_explicit_exists() {
9830        let runtime = rt();
9831        let h = Harness::new(&runtime);
9832        runtime.block_on(async {
9833            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9834            // Seed an explicit triple from focal -> ent:peer-target.
9835            // The semantic path produces edges focal -> ep:<other>; we
9836            // ensure both paths produce an edge ending at the same id by
9837            // wiring `peer-target = ep:<other_memory_id>` -- but the
9838            // entity emitter uses `ent:` prefix, not `ep:`. So to force a
9839            // collision we need an edge form where source+target overlap.
9840            //
9841            // Simpler construction: the `expand_triple_from_episode` path
9842            // emits an edge `ent:subject -> ent:object`, not from the
9843            // focal episode -- meaning the explicit edges don't end at
9844            // an ep: node in the first place. So we have to engineer a
9845            // collision via the cluster_member path:
9846            //   * explicit: focal (episode) -> cluster (via cluster_member)
9847            //   * semantic: focal -> similar episode
9848            // The two endpoints (cluster vs. episode) never collide in
9849            // shape. To produce a real (source, target) overlap that
9850            // exercises the dedupe code, mint a synthetic semantic edge
9851            // by adding an explicit triple sourced from the focal that
9852            // happens to end at the SAME entity the semantic path would
9853            // emit -- but semantic only emits ep:/chunk: ids, never ent:.
9854            //
9855            // The brief flagged this scenario as unlikely. Build the
9856            // simplest collision the codebase admits: have the focal
9857            // episode's semantic neighbor's memory_id appear as a
9858            // triple's object_id (formatted as ent:<that-uuid>). The
9859            // explicit edge is then `ent:<self-subject> -> ent:<uuid>`;
9860            // the semantic edge is `ep:focal -> ep:<uuid>`. The (source,
9861            // target) pair DIFFERS (`ent:X` vs `ep:focal`), so dedupe
9862            // would NOT fire -- which is correct: those are structurally
9863            // different relationships.
9864            //
9865            // Therefore the realistic dedupe test is the trivial
9866            // tautology: explicit and semantic produce no collisions in
9867            // practice. Lock that in by asserting that the same memory_id
9868            // never appears with an edge from both paths.
9869            let _other = post_remember(h.router.clone(), "beta beta beta").await;
9870            {
9871                let conn = h.open_db();
9872                let rowid: i64 = conn
9873                    .query_row(
9874                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9875                        rusqlite::params![&focal],
9876                        |r| r.get(0),
9877                    )
9878                    .unwrap();
9879                seed_triple_row(&conn, "t-dedupe-1", "Alice", "knows", "Bob", Some(rowid));
9880            }
9881            let (status, body) = call(
9882                h.router.clone(),
9883                "GET",
9884                &neighbors_uri(&format!("ep:{focal}"), Some("both"), Some(0.0), None),
9885                None,
9886            )
9887            .await;
9888            assert_eq!(status, StatusCode::OK, "body: {body}");
9889            // For every edge, count occurrences of (source, target). No
9890            // pair should appear twice (which is what the dedupe rule
9891            // guarantees).
9892            let edges = body["edges"].as_array().unwrap();
9893            let mut seen: std::collections::HashMap<(String, String), i32> =
9894                std::collections::HashMap::new();
9895            for e in edges {
9896                let key = (
9897                    e["source"].as_str().unwrap().to_string(),
9898                    e["target"].as_str().unwrap().to_string(),
9899                );
9900                *seen.entry(key).or_insert(0) += 1;
9901            }
9902            for (pair, count) in &seen {
9903                assert_eq!(
9904                    *count, 1,
9905                    "edge pair {pair:?} appears {count} times -- dedupe rule violated: {body}"
9906                );
9907            }
9908        });
9909        h.shutdown(&runtime);
9910    }
9911
9912    /// 5. Threshold filter -- raising the threshold drops low-similarity
9913    /// semantic neighbors.
9914    #[test]
9915    fn neighbors_threshold_filters_low_similarity() {
9916        let runtime = rt();
9917        let h = Harness::new(&runtime);
9918        runtime.block_on(async {
9919            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9920            let _o1 = post_remember(h.router.clone(), "beta one").await;
9921            let _o2 = post_remember(h.router.clone(), "beta two").await;
9922            let _o3 = post_remember(h.router.clone(), "beta three").await;
9923            // Low threshold -- expect more semantic hits.
9924            let (status, low_body) = call(
9925                h.router.clone(),
9926                "GET",
9927                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
9928                None,
9929            )
9930            .await;
9931            assert_eq!(status, StatusCode::OK, "body: {low_body}");
9932            let low_edge_count = low_body["edges"].as_array().unwrap().len();
9933            // High threshold -- expect fewer (or equal) semantic hits.
9934            let (status, high_body) = call(
9935                h.router.clone(),
9936                "GET",
9937                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.99), None),
9938                None,
9939            )
9940            .await;
9941            assert_eq!(status, StatusCode::OK, "body: {high_body}");
9942            let high_edge_count = high_body["edges"].as_array().unwrap().len();
9943            assert!(
9944                high_edge_count <= low_edge_count,
9945                "high-threshold ({high_edge_count}) must not exceed low-threshold ({low_edge_count}): low={low_body}, high={high_body}"
9946            );
9947            // Also assert every surviving high-threshold edge satisfies
9948            // the filter.
9949            for e in high_body["edges"].as_array().unwrap() {
9950                if let Some(w) = e["weight"].as_f64() {
9951                    assert!(
9952                        w >= 0.99,
9953                        "edge with weight {w} survived threshold=0.99: {e}"
9954                    );
9955                }
9956            }
9957        });
9958        h.shutdown(&runtime);
9959    }
9960
9961    /// 6. `?limit=999` is silently clamped at the family ceiling (100) --
9962    /// same policy as `/v1/graph/expand`.
9963    #[test]
9964    fn neighbors_limit_clamped_at_100() {
9965        let runtime = rt();
9966        let h = Harness::new(&runtime);
9967        // Seed a cluster with > 100 episodes so the explicit cluster_member
9968        // path could surface > 100 -- clamp must cap at 100.
9969        {
9970            let conn = h.open_db();
9971            seed_cluster_row(&conn, "cl-huge-n", 1000);
9972            for i in 0..150 {
9973                let mid = format!("99119911-1111-7000-8000-{:012}", i);
9974                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
9975                seed_cluster_member(&conn, "cl-huge-n", &mid);
9976            }
9977        }
9978        let (status, body) = runtime.block_on(call(
9979            h.router.clone(),
9980            "GET",
9981            &neighbors_uri("cl:cl-huge-n", Some("explicit"), None, Some(999)),
9982            None,
9983        ));
9984        assert_eq!(status, StatusCode::OK, "body: {body}");
9985        let edges = body["edges"].as_array().unwrap();
9986        assert_eq!(
9987            edges.len(),
9988            100,
9989            "limit must be silently clamped to 100, got {}",
9990            edges.len()
9991        );
9992        h.shutdown(&runtime);
9993    }
9994
9995    /// 7. `kind=semantic` on a document focal node returns 400.
9996    #[test]
9997    fn neighbors_semantic_rejects_document_source() {
9998        let runtime = rt();
9999        let h = Harness::new(&runtime);
10000        let doc_id = "d-semrej-0000-7000-8000-000000000001";
10001        {
10002            let conn = h.open_db();
10003            seed_document_row(&conn, doc_id, "host");
10004        }
10005        let (status, body) = runtime.block_on(call(
10006            h.router.clone(),
10007            "GET",
10008            &neighbors_uri(&format!("doc:{doc_id}"), Some("semantic"), None, None),
10009            None,
10010        ));
10011        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
10012        let err = body["error"].as_str().unwrap_or_default();
10013        assert!(
10014            err.contains("episode") && err.contains("chunk"),
10015            "error must list supported kinds: {body}"
10016        );
10017        h.shutdown(&runtime);
10018    }
10019
10020    /// 8. `kind=semantic` on a cluster focal node returns 400.
10021    #[test]
10022    fn neighbors_semantic_rejects_cluster_source() {
10023        let runtime = rt();
10024        let h = Harness::new(&runtime);
10025        let cluster_id = "cl-semrej-target";
10026        {
10027            let conn = h.open_db();
10028            seed_cluster_row(&conn, cluster_id, 12345);
10029        }
10030        let (status, body) = runtime.block_on(call(
10031            h.router.clone(),
10032            "GET",
10033            &neighbors_uri(&format!("cl:{cluster_id}"), Some("semantic"), None, None),
10034            None,
10035        ));
10036        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
10037        h.shutdown(&runtime);
10038    }
10039
10040    /// 9. Entity focal node returns only explicit triple edges; no
10041    /// semantic edges (entities have no embeddings, semantic path is
10042    /// silently skipped under `kind=both`).
10043    #[test]
10044    fn neighbors_entity_returns_triples_only() {
10045        let runtime = rt();
10046        let h = Harness::new(&runtime);
10047        runtime.block_on(async {
10048            // Use the writer-actor so the host episode lands in HNSW too
10049            // (any HNSW state is irrelevant since entities can't trigger
10050            // semantic recall; included to prove the semantic path is
10051            // silently skipped, not erroring).
10052            let host_mid = post_remember(h.router.clone(), "Alice and Bob talked").await;
10053            {
10054                let conn = h.open_db();
10055                let rowid: i64 = conn
10056                    .query_row(
10057                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
10058                        rusqlite::params![&host_mid],
10059                        |r| r.get(0),
10060                    )
10061                    .unwrap();
10062                seed_triple_row(&conn, "t-ent-n-1", "Alice", "knows", "Bob", Some(rowid));
10063                seed_triple_row(&conn, "t-ent-n-2", "Alice", "works_at", "Acme", Some(rowid));
10064            }
10065            let (status, body) = call(
10066                h.router.clone(),
10067                "GET",
10068                &neighbors_uri("ent:Alice", None, Some(0.0), None),
10069                None,
10070            )
10071            .await;
10072            assert_eq!(status, StatusCode::OK, "body: {body}");
10073            let edges = body["edges"].as_array().unwrap();
10074            assert!(!edges.is_empty(), "expected explicit triples: {body}");
10075            for e in edges {
10076                assert_eq!(
10077                    e["kind"], "triple",
10078                    "entity focal must produce only triple edges: {body}"
10079                );
10080            }
10081        });
10082        h.shutdown(&runtime);
10083    }
10084
10085    /// 10. Cross-tenant lookups are blocked at the TenantExtractor before
10086    /// the handler runs.
10087    #[test]
10088    fn neighbors_respects_tenant_scoping() {
10089        let runtime = rt();
10090        let h = Harness::new(&runtime);
10091        let memory_id = "a8880000-0000-7000-8000-000000000001";
10092        {
10093            let conn = h.open_db();
10094            seed_episode(&conn, memory_id, 100, "tenant scope");
10095        }
10096        // Wrong tenant header -> 404 from registry, before handler runs.
10097        let r = h.router.clone();
10098        let (status, _) = runtime.block_on(async {
10099            let req = Request::builder()
10100                .method("GET")
10101                .uri(neighbors_uri(
10102                    &format!("ep:{memory_id}"),
10103                    Some("explicit"),
10104                    None,
10105                    None,
10106                ))
10107                .header("x-solo-tenant", "never-registered-tenant-n")
10108                .body(Body::empty())
10109                .unwrap();
10110            let resp = r.oneshot(req).await.expect("oneshot");
10111            let s = resp.status();
10112            let _b = resp.into_body().collect().await.unwrap().to_bytes();
10113            (s, _b)
10114        });
10115        assert_eq!(status, StatusCode::NOT_FOUND);
10116        // Sanity: same id resolves on default tenant.
10117        let (status, body) = runtime.block_on(call(
10118            h.router.clone(),
10119            "GET",
10120            &neighbors_uri(&format!("ep:{memory_id}"), Some("explicit"), None, None),
10121            None,
10122        ));
10123        assert_eq!(
10124            status,
10125            StatusCode::OK,
10126            "default tenant must resolve: {body}"
10127        );
10128        h.shutdown(&runtime);
10129    }
10130
10131    /// 11. Bearer-auth gate: missing token -> 401; valid token + unknown
10132    /// node -> 404 (auth passed, handler ran).
10133    #[test]
10134    fn neighbors_respects_auth_when_enabled() {
10135        let runtime = rt();
10136        let h = Harness::new_with_auth(&runtime, Some("neighbors-secret".into()));
10137        // Missing Authorization -> 401.
10138        let (status, _) = runtime.block_on(call(
10139            h.router.clone(),
10140            "GET",
10141            &neighbors_uri(
10142                "ep:99999999-9999-7000-8000-000000000999",
10143                Some("explicit"),
10144                None,
10145                None,
10146            ),
10147            None,
10148        ));
10149        assert_eq!(status, StatusCode::UNAUTHORIZED);
10150        // Valid bearer + unknown node -> 404 from the handler.
10151        let (status, _) = runtime.block_on(call_with_auth(
10152            h.router.clone(),
10153            "GET",
10154            &neighbors_uri(
10155                "ep:99999999-9999-7000-8000-000000000999",
10156                Some("explicit"),
10157                None,
10158                None,
10159            ),
10160            None,
10161            Some("Bearer neighbors-secret"),
10162        ));
10163        assert_eq!(status, StatusCode::NOT_FOUND);
10164        h.shutdown(&runtime);
10165    }
10166
10167    // ---------------------------------------------------------------------
10168    // v0.10.0: GET /v1/graph/stream — SSE invalidation feed
10169    //
10170    // Driving SSE through axum's in-process router (`oneshot`) requires
10171    // reading the response body as a stream of frames and parsing each
10172    // chunk against the SSE wire format (`event: NAME\ndata: JSON\n\n`).
10173    // The `read_one_sse_event` helper below does that incrementally so
10174    // tests don't have to wait for the stream to close (which would
10175    // never happen — the SSE loop runs until the client drops).
10176    // ---------------------------------------------------------------------
10177
10178    /// One parsed SSE event: the `event:` field plus the `data:` payload
10179    /// re-parsed as JSON. The `id:` field is captured for v0.11.0 P2's
10180    /// `/mcp` GET stream which threads monotonic event ids through
10181    /// the wire — `None` for streams (`/v1/graph/stream`) that don't
10182    /// emit `id:` lines. Empty / comment-only frames are filtered out
10183    /// by the parser; callers only see real events.
10184    #[derive(Debug, Clone)]
10185    struct ParsedSseEvent {
10186        event: String,
10187        data: Value,
10188        /// Raw SSE `id:` field, if present. v0.11.0 P2 emits monotonic
10189        /// `u64` ids for `/mcp` events; the wire encodes them as
10190        /// strings.
10191        id: Option<String>,
10192    }
10193
10194    /// Read frames off the SSE body until ONE complete event lands, then
10195    /// return it. Times out after `timeout` to keep red-test feedback
10196    /// fast. On timeout returns `None`.
10197    async fn read_one_sse_event(
10198        body: &mut axum::body::Body,
10199        timeout: std::time::Duration,
10200    ) -> Option<ParsedSseEvent> {
10201        use http_body_util::BodyExt;
10202        let mut buf = String::new();
10203        let start = std::time::Instant::now();
10204        loop {
10205            if start.elapsed() >= timeout {
10206                return None;
10207            }
10208            let remaining = timeout.saturating_sub(start.elapsed());
10209            let frame_res = tokio::time::timeout(remaining, body.frame()).await;
10210            let frame = match frame_res {
10211                Ok(Some(Ok(f))) => f,
10212                Ok(Some(Err(_))) | Ok(None) => return None,
10213                Err(_) => return None,
10214            };
10215            if let Ok(data) = frame.into_data() {
10216                buf.push_str(&String::from_utf8_lossy(&data));
10217                // Parse complete events (double newline separator).
10218                while let Some(idx) = buf.find("\n\n") {
10219                    let block: String = buf.drain(..idx + 2).collect();
10220                    if let Some(parsed) = parse_sse_block(&block) {
10221                        return Some(parsed);
10222                    }
10223                }
10224            }
10225        }
10226    }
10227
10228    /// Parse one SSE block (raw text between two `\n\n` separators).
10229    /// Returns `None` for comment-only blocks (lines starting with `:`)
10230    /// or blocks missing either `event:` or `data:`.
10231    fn parse_sse_block(block: &str) -> Option<ParsedSseEvent> {
10232        let mut event: Option<String> = None;
10233        let mut data: Option<String> = None;
10234        let mut id: Option<String> = None;
10235        for line in block.lines() {
10236            if let Some(rest) = line.strip_prefix("event:") {
10237                event = Some(rest.trim().to_string());
10238            } else if let Some(rest) = line.strip_prefix("data:") {
10239                data = Some(rest.trim().to_string());
10240            } else if let Some(rest) = line.strip_prefix("id:") {
10241                id = Some(rest.trim().to_string());
10242            }
10243        }
10244        let event = event?;
10245        let data_str = data?;
10246        let data_json = serde_json::from_str(&data_str).ok()?;
10247        Some(ParsedSseEvent {
10248            event,
10249            data: data_json,
10250            id,
10251        })
10252    }
10253
10254    /// Open the SSE stream and return the response body for further
10255    /// frame-level reads. The headers are validated (Content-Type +
10256    /// status) before the body is returned.
10257    async fn open_sse_stream_inner(
10258        router: axum::Router,
10259        auth: Option<&str>,
10260        tenant: Option<&str>,
10261    ) -> (StatusCode, axum::body::Body) {
10262        let mut builder = Request::builder().method("GET").uri("/v1/graph/stream");
10263        if let Some(a) = auth {
10264            builder = builder.header("authorization", a);
10265        }
10266        if let Some(t) = tenant {
10267            builder = builder.header("x-solo-tenant", t);
10268        }
10269        let req = builder
10270            .header("content-length", "0")
10271            .body(Body::empty())
10272            .unwrap();
10273        let resp = router.oneshot(req).await.expect("oneshot");
10274        let status = resp.status();
10275        let body = resp.into_body();
10276        (status, body)
10277    }
10278
10279    /// 1. `init` event lands as the first chunk.
10280    #[test]
10281    fn stream_emits_init_event_on_connect() {
10282        let runtime = rt();
10283        let h = Harness::new(&runtime);
10284        let r = h.router.clone();
10285        runtime.block_on(async {
10286            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10287            assert_eq!(status, StatusCode::OK);
10288            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10289                .await
10290                .expect("must receive init event within 2s");
10291            assert_eq!(ev.event, "init");
10292            assert_eq!(ev.data["connected"].as_bool(), Some(true));
10293            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
10294            assert!(ev.data["ts_ms"].is_number());
10295        });
10296        h.shutdown(&runtime);
10297    }
10298
10299    /// 2. Firing an InvalidateEvent on the broadcast channel surfaces
10300    /// as an `invalidate` SSE event.
10301    #[test]
10302    fn stream_emits_invalidate_after_writer_event() {
10303        let runtime = rt();
10304        let h = Harness::new(&runtime);
10305        let r = h.router.clone();
10306        let sender = h.invalidate_sender();
10307        runtime.block_on(async {
10308            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10309            assert_eq!(status, StatusCode::OK);
10310            // Discard the init event.
10311            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10312                .await
10313                .unwrap();
10314            assert_eq!(init.event, "init");
10315            // Fire a writer-actor-style event on the broadcast.
10316            sender
10317                .send(InvalidateEvent {
10318                    reason: "memory.remember".to_string(),
10319                    tenant_id: "default".to_string(),
10320                    ts_ms: 1_715_625_600_000,
10321                    kind: "episode".to_string(),
10322                })
10323                .expect("must have at least one subscriber");
10324            // The SSE handler must surface it.
10325            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10326                .await
10327                .expect("invalidate event must arrive within 2s");
10328            assert_eq!(ev.event, "invalidate");
10329            assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
10330            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
10331            assert_eq!(ev.data["kind"].as_str(), Some("episode"));
10332        });
10333        h.shutdown(&runtime);
10334    }
10335
10336    /// 3. Each kind of writer-actor event surfaces with its mapped
10337    /// `(reason, kind)` shape.
10338    #[test]
10339    fn stream_emits_invalidate_for_each_writer_command() {
10340        let runtime = rt();
10341        let h = Harness::new(&runtime);
10342        let r = h.router.clone();
10343        let sender = h.invalidate_sender();
10344        let cases = [
10345            ("memory.remember", "episode"),
10346            ("memory.forget", "episode"),
10347            ("memory.consolidate", "cluster"),
10348            ("memory.ingest_document", "document"),
10349            ("memory.forget_document", "document"),
10350            ("memory.triples_extract", "cluster"),
10351            ("memory.reembed", "episode"),
10352            ("gdpr.forget_user", "tenant"),
10353        ];
10354        runtime.block_on(async {
10355            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10356            assert_eq!(status, StatusCode::OK);
10357            // Discard the init.
10358            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10359                .await
10360                .unwrap();
10361            for (reason, kind) in cases {
10362                sender
10363                    .send(InvalidateEvent {
10364                        reason: reason.to_string(),
10365                        tenant_id: "default".to_string(),
10366                        ts_ms: 1_715_625_600_000,
10367                        kind: kind.to_string(),
10368                    })
10369                    .unwrap();
10370                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10371                    .await
10372                    .unwrap_or_else(|| panic!("must receive event for {reason}"));
10373                assert_eq!(ev.event, "invalidate");
10374                assert_eq!(ev.data["reason"].as_str(), Some(reason), "reason mismatch");
10375                assert_eq!(ev.data["kind"].as_str(), Some(kind), "kind mismatch");
10376            }
10377        });
10378        h.shutdown(&runtime);
10379    }
10380
10381    /// 4. Heartbeat events fire on the configured interval when no real
10382    /// events arrive. Drives `build_invalidate_stream` at a 1-second
10383    /// heartbeat (the public handler uses 30s in prod), wraps it in an
10384    /// `Sse` response, then reads + parses the SSE body via the same
10385    /// `read_one_sse_event` helper the HTTP-layer tests use. This
10386    /// exercises the public Event → body byte path without touching
10387    /// `Event::finalize` (which is private).
10388    #[test]
10389    fn stream_emits_heartbeat_when_no_events() {
10390        let runtime = rt();
10391        let h = Harness::new(&runtime);
10392        let sender = h.invalidate_sender();
10393        runtime.block_on(async {
10394            // Subscribe FIRST so a later writer-side `send` would lag
10395            // the receiver if the subscriber stalled.
10396            let rx = sender.subscribe();
10397            // Build the SSE stream with a 1-second heartbeat interval —
10398            // bypassing the 30s production default.
10399            let stream = build_invalidate_stream(rx, "default".to_string(), 1);
10400            // Wrap in an Sse response + extract the body bytes through
10401            // axum's IntoResponse path. This produces real on-the-wire
10402            // SSE bytes that `read_one_sse_event` can parse.
10403            let sse: Sse<_> = Sse::new(stream);
10404            let resp = sse.into_response();
10405            let mut body = resp.into_body();
10406            // First event must be `init`.
10407            let first = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10408                .await
10409                .expect("init event must arrive");
10410            assert_eq!(first.event, "init");
10411            // Second must be heartbeat (no invalidates fired, ~1s
10412            // interval; allow 3s window for runtime jitter).
10413            let second = read_one_sse_event(&mut body, std::time::Duration::from_secs(3))
10414                .await
10415                .expect("heartbeat event must arrive within 3s");
10416            assert_eq!(second.event, "heartbeat");
10417            assert!(second.data["ts_ms"].is_number());
10418        });
10419        h.shutdown(&runtime);
10420    }
10421
10422    /// 5. Two subscribers connected to the same tenant both receive
10423    /// every invalidate.
10424    #[test]
10425    fn stream_concurrent_subscribers_same_tenant() {
10426        let runtime = rt();
10427        let h = Harness::new(&runtime);
10428        let r1 = h.router.clone();
10429        let r2 = h.router.clone();
10430        let r3 = h.router.clone();
10431        let sender = h.invalidate_sender();
10432        runtime.block_on(async {
10433            // Open three subscribers.
10434            let (s1, mut body1) = open_sse_stream_inner(r1, None, None).await;
10435            let (s2, mut body2) = open_sse_stream_inner(r2, None, None).await;
10436            let (s3, mut body3) = open_sse_stream_inner(r3, None, None).await;
10437            assert_eq!(s1, StatusCode::OK);
10438            assert_eq!(s2, StatusCode::OK);
10439            assert_eq!(s3, StatusCode::OK);
10440            // Drain init events from each.
10441            for body in [&mut body1, &mut body2, &mut body3] {
10442                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
10443                    .await
10444                    .unwrap();
10445                assert_eq!(ev.event, "init");
10446            }
10447            // Receiver count should be at least 3 now.
10448            assert!(
10449                sender.receiver_count() >= 3,
10450                "expected ≥3 subscribers, got {}",
10451                sender.receiver_count()
10452            );
10453            // Fire one invalidate.
10454            sender
10455                .send(InvalidateEvent {
10456                    reason: "memory.remember".to_string(),
10457                    tenant_id: "default".to_string(),
10458                    ts_ms: 1_715_625_600_000,
10459                    kind: "episode".to_string(),
10460                })
10461                .expect("send must succeed");
10462            // All three receive it.
10463            for body in [&mut body1, &mut body2, &mut body3] {
10464                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
10465                    .await
10466                    .unwrap();
10467                assert_eq!(ev.event, "invalidate");
10468                assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
10469            }
10470        });
10471        h.shutdown(&runtime);
10472    }
10473
10474    /// 6. Dropping the SSE client decrements the per-tenant subscriber
10475    /// count — graceful cleanup invariant.
10476    #[test]
10477    fn stream_handles_client_disconnect_gracefully() {
10478        let runtime = rt();
10479        let h = Harness::new(&runtime);
10480        let r = h.router.clone();
10481        let sender = h.invalidate_sender();
10482        let before = sender.receiver_count();
10483        runtime.block_on(async {
10484            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10485            assert_eq!(status, StatusCode::OK);
10486            // Drain the init so the stream is fully active.
10487            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10488                .await
10489                .unwrap();
10490            let during = sender.receiver_count();
10491            assert!(
10492                during > before,
10493                "subscriber count must increase while stream is live (before={before}, during={during})"
10494            );
10495            // Drop the body — simulates the client closing the
10496            // connection. axum drops the stream future, which drops the
10497            // Receiver.
10498            drop(body);
10499        });
10500        // Allow tokio a beat to drop the Receiver task.
10501        runtime.block_on(async {
10502            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
10503        });
10504        let after = sender.receiver_count();
10505        assert!(
10506            after <= before,
10507            "subscriber count must drop back after disconnect (before={before}, after={after})"
10508        );
10509        h.shutdown(&runtime);
10510    }
10511
10512    /// 7. Bearer-auth gate: missing token -> 401.
10513    #[test]
10514    fn stream_respects_auth_when_enabled() {
10515        let runtime = rt();
10516        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
10517        let r = h.router.clone();
10518        runtime.block_on(async {
10519            let (status, _body) = open_sse_stream_inner(r, None, None).await;
10520            assert_eq!(status, StatusCode::UNAUTHORIZED);
10521        });
10522        h.shutdown(&runtime);
10523    }
10524
10525    /// 8. Anonymous OK when auth=None (loopback default).
10526    #[test]
10527    fn stream_works_with_auth_none() {
10528        let runtime = rt();
10529        let h = Harness::new(&runtime);
10530        let r = h.router.clone();
10531        runtime.block_on(async {
10532            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
10533            assert_eq!(status, StatusCode::OK);
10534            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10535                .await
10536                .expect("must receive init event");
10537            assert_eq!(ev.event, "init");
10538        });
10539        h.shutdown(&runtime);
10540    }
10541
10542    /// 9. Bearer-auth gate: valid token allows the stream to open.
10543    #[test]
10544    fn stream_respects_auth_accepts_valid_token() {
10545        let runtime = rt();
10546        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
10547        let r = h.router.clone();
10548        runtime.block_on(async {
10549            let (status, mut body) =
10550                open_sse_stream_inner(r, Some("Bearer stream-secret"), None).await;
10551            assert_eq!(status, StatusCode::OK);
10552            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
10553                .await
10554                .expect("must receive init event with valid bearer");
10555            assert_eq!(ev.event, "init");
10556            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
10557        });
10558        h.shutdown(&runtime);
10559    }
10560
10561    /// 10. Cross-tenant lookups are 404 at TenantExtractor before the
10562    /// stream opens — wrong tenant header never reaches the handler.
10563    #[test]
10564    fn stream_respects_tenant_scoping() {
10565        let runtime = rt();
10566        let h = Harness::new(&runtime);
10567        let r = h.router.clone();
10568        runtime.block_on(async {
10569            let (status, _body) =
10570                open_sse_stream_inner(r, None, Some("never-registered-tenant-x")).await;
10571            // The single-tenant test registry returns NotFound from
10572            // get_or_open when the header points to a tenant that isn't
10573            // cached; the TenantExtractor maps that to 404.
10574            assert_eq!(status, StatusCode::NOT_FOUND);
10575        });
10576        h.shutdown(&runtime);
10577    }
10578
10579    // -----------------------------------------------------------------
10580    // /v1/status — authenticated tenant-aware readiness
10581    // -----------------------------------------------------------------
10582
10583    #[test]
10584    fn status_returns_tenant_aware_payload() {
10585        let runtime = rt();
10586        let h = Harness::new(&runtime);
10587        let r = h.router.clone();
10588        runtime.block_on(async {
10589            let tid = solo_core::TenantId::default_tenant();
10590            h.registry
10591                .with_index(|idx| {
10592                    idx.register_with_quota(
10593                        &tid,
10594                        "default.db",
10595                        Some("Default tenant"),
10596                        Some(1_234_567),
10597                    )
10598                    .unwrap();
10599                })
10600                .await;
10601
10602            let (status, body) = call(r, "GET", "/v1/status", None).await;
10603            assert_eq!(status, StatusCode::OK, "body: {body}");
10604            assert_eq!(body["ok"].as_bool(), Some(true));
10605            assert_eq!(body["version"].as_str(), Some(env!("CARGO_PKG_VERSION")));
10606            assert_eq!(
10607                body.pointer("/tenant/id").and_then(|v| v.as_str()),
10608                Some("default")
10609            );
10610            assert_eq!(
10611                body.pointer("/tenant/registered").and_then(|v| v.as_bool()),
10612                Some(true)
10613            );
10614            assert_eq!(
10615                body.pointer("/tenant/status").and_then(|v| v.as_str()),
10616                Some("active")
10617            );
10618            assert_eq!(
10619                body.pointer("/tenant/quota_bytes").and_then(|v| v.as_u64()),
10620                Some(1_234_567)
10621            );
10622            assert!(
10623                body.pointer("/tenant/last_accessed_ms")
10624                    .and_then(|v| v.as_i64())
10625                    .is_some(),
10626                "status should surface the TenantExtractor touch: {body}"
10627            );
10628            assert_eq!(
10629                body.pointer("/embedder/name").and_then(|v| v.as_str()),
10630                Some("stub")
10631            );
10632            assert_eq!(
10633                body.pointer("/embedder/version").and_then(|v| v.as_str()),
10634                Some("v1")
10635            );
10636            assert_eq!(
10637                body.pointer("/embedder/dim").and_then(|v| v.as_u64()),
10638                Some(16)
10639            );
10640            assert_eq!(
10641                body.pointer("/embedder/dtype").and_then(|v| v.as_str()),
10642                Some("f32")
10643            );
10644            assert_eq!(body["active_tenants"].as_u64(), Some(1));
10645            assert_eq!(
10646                body.pointer("/mcp/sessions").and_then(|v| v.as_u64()),
10647                Some(0)
10648            );
10649        });
10650        h.shutdown(&runtime);
10651    }
10652
10653    #[test]
10654    fn status_respects_auth_when_enabled() {
10655        let runtime = rt();
10656        let h = Harness::new_with_auth(&runtime, Some("status-secret".into()));
10657        let r = h.router.clone();
10658        runtime.block_on(async {
10659            let (status, _body) = call(r.clone(), "GET", "/v1/status", None).await;
10660            assert_eq!(status, StatusCode::UNAUTHORIZED);
10661
10662            let (status, body) =
10663                call_with_auth(r, "GET", "/v1/status", None, Some("Bearer status-secret")).await;
10664            assert_eq!(status, StatusCode::OK, "body: {body}");
10665            assert_eq!(body["ok"].as_bool(), Some(true));
10666        });
10667        h.shutdown(&runtime);
10668    }
10669
10670    #[test]
10671    fn status_respects_tenant_scoping() {
10672        let runtime = rt();
10673        let h = Harness::new(&runtime);
10674        let r = h.router.clone();
10675        runtime.block_on(async {
10676            let (status, body) =
10677                call_with_tenant(r, "GET", "/v1/status", None, "never-registered").await;
10678            assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
10679        });
10680        h.shutdown(&runtime);
10681    }
10682
10683    // -----------------------------------------------------------------
10684    // /v1/tenants — principal-scoped tenant list (v0.10.0)
10685    //
10686    // Seeds the harness's in-memory tenants_index stub via
10687    // `harness.registry.with_index(|idx| idx.register(...))` to drive
10688    // the read-only list endpoint. The default tenant from the
10689    // harness's HashMap is NOT in the index stub by construction (the
10690    // `for_tests_with_single_tenant` factory only wires the cached
10691    // HashMap entry; the index starts empty after migrations), so each
10692    // test that wants the default tenant listed registers it
10693    // explicitly. This keeps the test setup explicit about what's
10694    // visible to `list_active` versus what's open in memory.
10695    // -----------------------------------------------------------------
10696
10697    /// Seed three Active tenants into the registry's index. Returns the
10698    /// ids in the order they were registered, which is the order
10699    /// `list_active` will return them in (ORDER BY created_at_ms ASC).
10700    async fn seed_three_tenants(registry: &TenantRegistry) -> Vec<String> {
10701        use solo_core::TenantId as TenantIdT;
10702        let ids = ["alice", "bob", "default"];
10703        for id in ids {
10704            let tid = TenantIdT::new(id).unwrap();
10705            registry
10706                .with_index(|idx| {
10707                    idx.register(&tid, &format!("{id}.db"), Some(&format!("{id} tenant")))
10708                        .unwrap();
10709                    // Ensure created_at_ms diverges so the ASC sort is
10710                    // deterministic — the index uses `chrono::Utc::now()`
10711                    // per row and 3 sequential inserts can land in the
10712                    // same ms on fast hardware.
10713                })
10714                .await;
10715            tokio::time::sleep(std::time::Duration::from_millis(2)).await;
10716        }
10717        // Sort matches the `created_at_ms ASC, tenant_id ASC` order
10718        // `TenantsIndex::list` returns. We inserted in (alice, bob,
10719        // default) order with 2ms gaps, so that's the expected order.
10720        vec!["alice".into(), "bob".into(), "default".into()]
10721    }
10722
10723    /// 1. With `AuthConfig::None`, the handler returns every tenant
10724    ///    visible in the registry — same scope as `solo tenants list`.
10725    ///    Exercises the "no principal" branch of the visibility filter.
10726    #[test]
10727    fn tenants_returns_all_when_auth_none() {
10728        let runtime = rt();
10729        let h = Harness::new(&runtime);
10730        let r = h.router.clone();
10731        runtime.block_on(async {
10732            let _expected = seed_three_tenants(&h.registry).await;
10733            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10734            assert_eq!(status, StatusCode::OK);
10735            let arr = body
10736                .get("tenants")
10737                .and_then(|v| v.as_array())
10738                .expect("tenants array");
10739            assert_eq!(arr.len(), 3, "got body: {body}");
10740            let ids: Vec<&str> = arr.iter().filter_map(|t| t["id"].as_str()).collect();
10741            assert_eq!(ids, vec!["alice", "bob", "default"]);
10742        });
10743        h.shutdown(&runtime);
10744    }
10745
10746    /// 2. Under Bearer auth (single-principal mode), the handler
10747    ///    returns every tenant — the bearer holder is treated as the
10748    ///    daemon operator with full visibility. Exercises the bearer
10749    ///    branch of the visibility filter.
10750    #[test]
10751    fn tenants_returns_all_when_bearer_auth() {
10752        let runtime = rt();
10753        let h = Harness::new_with_auth(&runtime, Some("tlist-secret".into()));
10754        let r = h.router.clone();
10755        runtime.block_on(async {
10756            seed_three_tenants(&h.registry).await;
10757            let (status, body) =
10758                call_with_auth(r, "GET", "/v1/tenants", None, Some("Bearer tlist-secret")).await;
10759            assert_eq!(status, StatusCode::OK, "got body: {body}");
10760            let arr = body["tenants"].as_array().expect("tenants array");
10761            assert_eq!(arr.len(), 3, "bearer must see all tenants");
10762        });
10763        h.shutdown(&runtime);
10764    }
10765
10766    /// 3. Under OIDC, an authenticated principal carrying
10767    ///    `tenant_claim = "alice"` sees ONLY alice — not bob, not
10768    ///    default. Exercises the OIDC branch of the visibility filter.
10769    #[test]
10770    fn tenants_filters_to_principal_claim_when_oidc() {
10771        let runtime = rt();
10772        let (fake_server, discovery_url, secret, kid) =
10773            runtime.block_on(async { spin_fake_idp().await });
10774        let server_uri = fake_server.uri();
10775        let _server_guard = fake_server;
10776
10777        let auth = crate::auth::AuthConfig::Oidc {
10778            discovery_url,
10779            audience: "tlist-audience".to_string(),
10780            tenant_claim_name: "solo_tenant".to_string(),
10781        };
10782        let h = Harness::new_with_auth_config(&runtime, Some(auth));
10783        let r = h.router.clone();
10784
10785        runtime.block_on(async {
10786            seed_three_tenants(&h.registry).await;
10787            let token = mint_idp_token(&server_uri, kid, &secret, "alice", "tlist-audience");
10788            let (status, body) = call_with_auth(
10789                r,
10790                "GET",
10791                "/v1/tenants",
10792                None,
10793                Some(&format!("Bearer {token}")),
10794            )
10795            .await;
10796            assert_eq!(status, StatusCode::OK, "got body: {body}");
10797            let arr = body["tenants"].as_array().expect("tenants array");
10798            assert_eq!(arr.len(), 1, "OIDC alice must see exactly one tenant");
10799            assert_eq!(arr[0]["id"].as_str(), Some("alice"));
10800        });
10801        h.shutdown(&runtime);
10802    }
10803
10804    /// 4. Under OIDC with a `tenant_claim` that doesn't match any
10805    ///    registered tenant, the response is `200 OK` with
10806    ///    `tenants: []` — NOT 404. Don't leak whether other tenants
10807    ///    exist via a status-code side-channel for an OIDC principal
10808    ///    that lacks visibility to them.
10809    #[test]
10810    fn tenants_returns_empty_when_oidc_claim_unmatched() {
10811        let runtime = rt();
10812        let (fake_server, discovery_url, secret, kid) =
10813            runtime.block_on(async { spin_fake_idp().await });
10814        let server_uri = fake_server.uri();
10815        let _server_guard = fake_server;
10816
10817        let auth = crate::auth::AuthConfig::Oidc {
10818            discovery_url,
10819            audience: "tlist-audience".to_string(),
10820            tenant_claim_name: "solo_tenant".to_string(),
10821        };
10822        let h = Harness::new_with_auth_config(&runtime, Some(auth));
10823        let r = h.router.clone();
10824
10825        runtime.block_on(async {
10826            seed_three_tenants(&h.registry).await;
10827            // Mint a token claiming a tenant that IS a valid TenantId
10828            // (passes middleware) but doesn't exist in the index.
10829            let token = mint_idp_token(&server_uri, kid, &secret, "nonexistent", "tlist-audience");
10830            let (status, body) = call_with_auth(
10831                r,
10832                "GET",
10833                "/v1/tenants",
10834                None,
10835                Some(&format!("Bearer {token}")),
10836            )
10837            .await;
10838            assert_eq!(
10839                status,
10840                StatusCode::OK,
10841                "must be 200 OK, not 404 — don't leak tenant existence: {body}"
10842            );
10843            let arr = body["tenants"].as_array().expect("tenants array");
10844            assert_eq!(
10845                arr.len(),
10846                0,
10847                "unmatched OIDC claim must produce empty list, got: {body}"
10848            );
10849        });
10850        h.shutdown(&runtime);
10851    }
10852
10853    /// 5. JSON response shape matches what solo-web's TypeScript
10854    ///    client expects: `tenants[*].{id,display_name,created_at_ms,
10855    ///    status,quota_bytes,episode_count,size_bytes,pct_used,
10856    ///    last_accessed_ms}`. Catches accidental field renames at PR
10857    ///    time.
10858    ///
10859    ///    v0.10.1: `episode_count` / `size_bytes` / `pct_used` are
10860    ///    hydrated when the per-tenant DB file exists. This test
10861    ///    registers a tenant whose DB file does NOT exist (the
10862    ///    `for_tests_with_single_tenant` harness only writes the
10863    ///    `default` tenant's DB), so the three numeric fields land as
10864    ///    JSON `null` — verifying the `null` JSON value (not absence)
10865    ///    so clients see a stable shape regardless of hydration
10866    ///    success.
10867    #[test]
10868    fn tenants_response_shape_matches_solo_web_types() {
10869        let runtime = rt();
10870        let h = Harness::new(&runtime);
10871        let r = h.router.clone();
10872        runtime.block_on(async {
10873            // Register one tenant with a display_name + quota so all
10874            // optional fields are present in the response.
10875            let tid = solo_core::TenantId::new("shaped").unwrap();
10876            h.registry
10877                .with_index(|idx| {
10878                    idx.register_with_quota(
10879                        &tid,
10880                        "shaped.db",
10881                        Some("Shaped tenant"),
10882                        Some(1_048_576),
10883                    )
10884                    .unwrap();
10885                })
10886                .await;
10887            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10888            assert_eq!(status, StatusCode::OK);
10889            let item = &body["tenants"][0];
10890            // id, display_name, created_at_ms, status: required
10891            assert_eq!(item["id"].as_str(), Some("shaped"));
10892            assert_eq!(item["display_name"].as_str(), Some("Shaped tenant"));
10893            assert!(
10894                item["created_at_ms"].is_i64(),
10895                "created_at_ms must be an i64, got {item}"
10896            );
10897            assert_eq!(item["status"].as_str(), Some("active"));
10898            // quota_bytes: present + numeric
10899            assert_eq!(item["quota_bytes"].as_u64(), Some(1_048_576));
10900            // v0.10.1: episode_count / size_bytes / pct_used become
10901            // null when the per-tenant DB file is missing on disk
10902            // (this harness only writes the default tenant's file —
10903            // shaped.db does not exist). Clients must tolerate the
10904            // null JSON shape; absence would be a breaking change.
10905            assert!(
10906                item["episode_count"].is_null(),
10907                "episode_count must be JSON null when tenant DB is missing, got {item}"
10908            );
10909            assert!(
10910                item["size_bytes"].is_null(),
10911                "size_bytes must be JSON null when tenant DB is missing, got {item}"
10912            );
10913            assert!(
10914                item["pct_used"].is_null(),
10915                "pct_used must be JSON null when size_bytes is null, got {item}"
10916            );
10917        });
10918        h.shutdown(&runtime);
10919    }
10920
10921    /// 6. Bearer auth enabled + missing Authorization header → 401
10922    ///    before the handler runs. Confirms the route is plumbed
10923    ///    through `auth_middleware` (it sits inside the `authed`
10924    ///    sub-router, not the `public` one).
10925    /// `last_accessed_ms` is observational but user-facing: the
10926    /// registry stamps it when a tenant is resolved, and `/v1/tenants`
10927    /// must surface that value for solo-web's tenant/status UI. Listing
10928    /// alone must not fake a touch; a real tenant-scoped request should.
10929    #[test]
10930    fn tenants_response_surfaces_last_accessed_after_tenant_request() {
10931        let runtime = rt();
10932        let h = Harness::new(&runtime);
10933        let r = h.router.clone();
10934        runtime.block_on(async {
10935            let tid = solo_core::TenantId::default_tenant();
10936            h.registry
10937                .with_index(|idx| {
10938                    idx.register(&tid, "default.db", Some("Default tenant"))
10939                        .unwrap();
10940                })
10941                .await;
10942
10943            let (status, before_body) = call(r.clone(), "GET", "/v1/tenants", None).await;
10944            assert_eq!(status, StatusCode::OK);
10945            let before_item = &before_body["tenants"][0];
10946            assert_eq!(before_item["id"].as_str(), Some("default"));
10947            assert!(
10948                before_item["last_accessed_ms"].is_null(),
10949                "freshly registered tenant should start untouched: {before_item}"
10950            );
10951
10952            let before_touch_ms = chrono::Utc::now().timestamp_millis();
10953            let (graph_status, graph_body) = call(
10954                r.clone(),
10955                "GET",
10956                "/v1/graph/nodes?kind=episode&limit=1",
10957                None,
10958            )
10959            .await;
10960            assert_eq!(graph_status, StatusCode::OK, "graph body: {graph_body}");
10961
10962            let (status, after_body) = call(r, "GET", "/v1/tenants", None).await;
10963            assert_eq!(status, StatusCode::OK);
10964            let after_item = &after_body["tenants"][0];
10965            let last_accessed = after_item["last_accessed_ms"]
10966                .as_i64()
10967                .unwrap_or_else(|| panic!("last_accessed_ms must be stamped: {after_item}"));
10968            assert!(
10969                last_accessed >= before_touch_ms,
10970                "last_accessed_ms should reflect the graph request touch: {after_item}"
10971            );
10972        });
10973        h.shutdown(&runtime);
10974    }
10975
10976    #[test]
10977    fn tenants_respects_auth_when_enabled() {
10978        let runtime = rt();
10979        let h = Harness::new_with_auth(&runtime, Some("must-auth".into()));
10980        let r = h.router.clone();
10981        runtime.block_on(async {
10982            seed_three_tenants(&h.registry).await;
10983            // No Authorization header → 401.
10984            let (status, _body) = call(r, "GET", "/v1/tenants", None).await;
10985            assert_eq!(status, StatusCode::UNAUTHORIZED);
10986        });
10987        h.shutdown(&runtime);
10988    }
10989
10990    /// 7. `PendingMigration` and `PendingDelete` rows are excluded
10991    ///    from the response. solo-web's tenant picker should never
10992    ///    surface a row that's mid-admin-operation (race with admin
10993    ///    tooling). Only Active tenants make the list.
10994    #[test]
10995    fn tenants_status_filter_excludes_non_active() {
10996        let runtime = rt();
10997        let h = Harness::new(&runtime);
10998        let r = h.router.clone();
10999        runtime.block_on(async {
11000            // Three tenants, three statuses. Only `keeper` (Active)
11001            // should appear on the wire.
11002            let keeper = solo_core::TenantId::new("keeper").unwrap();
11003            let migrating = solo_core::TenantId::new("migrating").unwrap();
11004            let deleting = solo_core::TenantId::new("deleting").unwrap();
11005            h.registry
11006                .with_index(|idx| {
11007                    idx.register(&keeper, "keeper.db", None).unwrap();
11008                    idx.register_with_status(
11009                        &migrating,
11010                        "migrating.db",
11011                        None,
11012                        solo_storage::TenantStatus::PendingMigration,
11013                    )
11014                    .unwrap();
11015                    idx.register_with_status(
11016                        &deleting,
11017                        "deleting.db",
11018                        None,
11019                        solo_storage::TenantStatus::PendingDelete,
11020                    )
11021                    .unwrap();
11022                })
11023                .await;
11024            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11025            assert_eq!(status, StatusCode::OK);
11026            let arr = body["tenants"].as_array().expect("tenants array");
11027            let ids: Vec<&str> = arr.iter().filter_map(|t| t["id"].as_str()).collect();
11028            assert_eq!(
11029                ids,
11030                vec!["keeper"],
11031                "only Active tenants visible; got: {body}"
11032            );
11033        });
11034        h.shutdown(&runtime);
11035    }
11036
11037    /// 8. Empty registry → `200 OK` with `tenants: []`. Defends
11038    ///    against accidental `None` serialisation or 404'ing on an
11039    ///    empty list. solo-web's first paint on a brand-new daemon
11040    ///    needs an empty array to render the "no tenants yet" state.
11041    #[test]
11042    fn tenants_returns_empty_array_when_no_tenants_registered() {
11043        let runtime = rt();
11044        let h = Harness::new(&runtime);
11045        let r = h.router.clone();
11046        runtime.block_on(async {
11047            // Don't seed anything — the harness's in-memory index
11048            // starts at zero rows (the cached default-tenant handle in
11049            // the HashMap is invisible to `list_active`).
11050            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11051            assert_eq!(status, StatusCode::OK);
11052            let arr = body["tenants"].as_array().expect("tenants array");
11053            assert_eq!(arr.len(), 0, "expected empty array, got: {body}");
11054        });
11055        h.shutdown(&runtime);
11056    }
11057
11058    // ---- v0.10.1: cost-number hydration tests ----
11059    //
11060    // These exercise `TenantRegistry::hydrate_tenant_cost_numbers` end-
11061    // to-end through the `/v1/tenants` handler. The harness's
11062    // `for_tests_with_single_tenant` registry uses a plain-SQLite tenant
11063    // DB (not real SQLCipher); the hydration helper has a fallback
11064    // open path for that case (see registry.rs). The
11065    // `_tmp_dir/tenants/<filename>` layout matters: that's where the
11066    // hydration helper looks. These tests create real files there to
11067    // exercise the size_bytes path; episode_count requires the file to
11068    // be a SQLite DB with the `episodes` table.
11069    //
11070    // The `default` tenant exists at `_tmp_dir/test.db` (set by the
11071    // harness); the hydration helper expects `_tmp_dir/tenants/<file>`.
11072    // So we either (a) register a fresh tenant id pointing at a DB we
11073    // create at the expected layout, or (b) check the documented
11074    // behavior under "file missing" (returns null counts gracefully).
11075    // Both shapes are tested here.
11076    //
11077    // The constant `TENANTS_COUNT_HYDRATION_CAP` is grep-able.
11078
11079    /// Helper: create a per-tenant DB file at the layout the hydration
11080    /// helper expects (`<data_dir>/tenants/<db_filename>`), populated
11081    /// with the `episodes` table + `n_active` active episodes +
11082    /// `n_forgotten` forgotten episodes. Returns the absolute path.
11083    fn seed_per_tenant_db_with_episodes(
11084        data_dir: &std::path::Path,
11085        db_filename: &str,
11086        n_active: i64,
11087        n_forgotten: i64,
11088    ) -> std::path::PathBuf {
11089        let tenants_dir = data_dir.join(solo_storage::TENANTS_SUBDIR);
11090        std::fs::create_dir_all(&tenants_dir).unwrap();
11091        let db_path = tenants_dir.join(db_filename);
11092        // Open as plain SQLite (test path; matches the harness's
11093        // `open_test_db_at` shape; hydration helper falls back to plain
11094        // open when SQLCipher open fails).
11095        let mut conn = rusqlite::Connection::open(&db_path).unwrap();
11096        // Run the same migrations the real per-tenant DB does so the
11097        // `episodes` table + `status` CHECK constraint match production.
11098        solo_storage::run_migrations(&mut conn).unwrap();
11099        for i in 0..n_active {
11100            conn.execute(
11101                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
11102                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'active', 0, 0)",
11103                rusqlite::params![format!("a-{i}")],
11104            )
11105            .unwrap();
11106        }
11107        for i in 0..n_forgotten {
11108            conn.execute(
11109                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
11110                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'forgotten', 0, 0)",
11111                rusqlite::params![format!("f-{i}")],
11112            )
11113            .unwrap();
11114        }
11115        drop(conn);
11116        db_path
11117    }
11118
11119    /// v0.10.1 test 1: `episode_count` hydrates to the actual active
11120    /// episode count when the per-tenant DB exists. Seed 3 active + 2
11121    /// forgotten episodes; expect `episode_count: 3` (the `status =
11122    /// 'active'` filter excludes the forgotten rows).
11123    #[test]
11124    fn tenants_response_hydrates_episode_count_when_tenant_has_data() {
11125        let runtime = rt();
11126        let h = Harness::new(&runtime);
11127        let r = h.router.clone();
11128        let data_dir = h._tmp.path().to_path_buf();
11129        runtime.block_on(async {
11130            let tid = solo_core::TenantId::new("counted").unwrap();
11131            seed_per_tenant_db_with_episodes(&data_dir, "counted.db", 3, 2);
11132            h.registry
11133                .with_index(|idx| {
11134                    idx.register(&tid, "counted.db", Some("Counted tenant"))
11135                        .unwrap();
11136                })
11137                .await;
11138            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11139            assert_eq!(status, StatusCode::OK);
11140            let item = &body["tenants"][0];
11141            assert_eq!(item["id"].as_str(), Some("counted"));
11142            assert_eq!(
11143                item["episode_count"].as_i64(),
11144                Some(3),
11145                "episode_count must be 3 (active rows only, 2 forgotten excluded); got {item}"
11146            );
11147        });
11148        h.shutdown(&runtime);
11149    }
11150
11151    /// v0.10.1 test 2: `size_bytes` reports the on-disk size of the
11152    /// per-tenant DB file. Asserts the response value matches
11153    /// `std::fs::metadata(<db_path>).len()` exactly — pins that we
11154    /// read the right file, not e.g. data_dir or a temp.
11155    #[test]
11156    fn tenants_response_hydrates_size_bytes_from_db_file() {
11157        let runtime = rt();
11158        let h = Harness::new(&runtime);
11159        let r = h.router.clone();
11160        let data_dir = h._tmp.path().to_path_buf();
11161        runtime.block_on(async {
11162            let tid = solo_core::TenantId::new("sized").unwrap();
11163            let db_path = seed_per_tenant_db_with_episodes(&data_dir, "sized.db", 1, 0);
11164            h.registry
11165                .with_index(|idx| {
11166                    idx.register(&tid, "sized.db", None).unwrap();
11167                })
11168                .await;
11169            let on_disk = std::fs::metadata(&db_path).unwrap().len();
11170            assert!(on_disk > 0, "test setup: db file should be non-empty");
11171            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11172            assert_eq!(status, StatusCode::OK);
11173            let item = &body["tenants"][0];
11174            assert_eq!(item["id"].as_str(), Some("sized"));
11175            assert_eq!(
11176                item["size_bytes"].as_u64(),
11177                Some(on_disk),
11178                "size_bytes must match fs::metadata; got {item}"
11179            );
11180        });
11181        h.shutdown(&runtime);
11182    }
11183
11184    /// v0.10.1 test 3: `pct_used` is computed from `size_bytes /
11185    /// quota_bytes * 100` when both are known. Pick a quota much
11186    /// larger than the DB so the percentage stays in a sane range
11187    /// (and survives any unrelated DB-page padding).
11188    #[test]
11189    fn tenants_response_computes_pct_used_when_quota_set() {
11190        let runtime = rt();
11191        let h = Harness::new(&runtime);
11192        let r = h.router.clone();
11193        let data_dir = h._tmp.path().to_path_buf();
11194        runtime.block_on(async {
11195            let tid = solo_core::TenantId::new("quoted").unwrap();
11196            let db_path = seed_per_tenant_db_with_episodes(&data_dir, "quoted.db", 1, 0);
11197            // Pick a quota that's large enough that pct_used lands
11198            // between 0 and 50% regardless of SQLite page boundary
11199            // rounding. Asserting an exact float would be flaky.
11200            let on_disk = std::fs::metadata(&db_path).unwrap().len();
11201            let quota = on_disk * 4; // pct_used should be ~25%
11202            h.registry
11203                .with_index(|idx| {
11204                    idx.register_with_quota(&tid, "quoted.db", None, Some(quota))
11205                        .unwrap();
11206                })
11207                .await;
11208            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11209            assert_eq!(status, StatusCode::OK);
11210            let item = &body["tenants"][0];
11211            let pct = item["pct_used"]
11212                .as_f64()
11213                .expect("pct_used must be a number");
11214            assert!(
11215                (0.0..=100.0).contains(&pct),
11216                "pct_used must be in [0, 100], got {pct}"
11217            );
11218            // Allow a wide band — exact value depends on SQLite page
11219            // size — but the recipe (size/quota*100) means a
11220            // size=quota/4 setup must land near 25%.
11221            assert!(
11222                (20.0..=30.0).contains(&pct),
11223                "pct_used must be ~25% for size=quota/4, got {pct}"
11224            );
11225        });
11226        h.shutdown(&runtime);
11227    }
11228
11229    /// v0.10.1 test 4: `pct_used` is `null` when `quota_bytes` is
11230    /// null (the "unlimited" case). Pins that we don't accidentally
11231    /// emit a numeric `0.0` or `100.0` for unlimited quotas.
11232    #[test]
11233    fn tenants_response_pct_used_null_when_quota_null() {
11234        let runtime = rt();
11235        let h = Harness::new(&runtime);
11236        let r = h.router.clone();
11237        let data_dir = h._tmp.path().to_path_buf();
11238        runtime.block_on(async {
11239            let tid = solo_core::TenantId::new("unlimited").unwrap();
11240            seed_per_tenant_db_with_episodes(&data_dir, "unlimited.db", 1, 0);
11241            h.registry
11242                .with_index(|idx| {
11243                    idx.register(&tid, "unlimited.db", None).unwrap();
11244                })
11245                .await;
11246            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
11247            assert_eq!(status, StatusCode::OK);
11248            let item = &body["tenants"][0];
11249            assert_eq!(item["id"].as_str(), Some("unlimited"));
11250            assert!(
11251                item["quota_bytes"].is_null(),
11252                "test setup: quota_bytes must be null, got {item}"
11253            );
11254            assert!(
11255                item["pct_used"].is_null(),
11256                "pct_used must be JSON null when quota_bytes is null, got {item}"
11257            );
11258            // size_bytes still present (no quota doesn't suppress
11259            // size — only pct_used).
11260            assert!(
11261                item["size_bytes"].is_u64(),
11262                "size_bytes must still be present when quota_bytes is null, got {item}"
11263            );
11264        });
11265        h.shutdown(&runtime);
11266    }
11267
11268    /// v0.10.1 test 5: the response includes
11269    /// `X-Solo-Tenants-Count-Cap-Reached: true` when the filtered
11270    /// tenant count exceeds `TENANTS_COUNT_HYDRATION_CAP`. Tenants
11271    /// beyond the cap have `episode_count: null` even though their
11272    /// `size_bytes` is still hydrated (fs::metadata is cheap).
11273    ///
11274    /// We don't seed 51 real DBs (would be slow); instead, we
11275    /// register 51 tenant rows in the index. The cap is documented
11276    /// to apply to `episode_count` hydration, and the header is
11277    /// emitted purely from the count of filtered records. The
11278    /// header semantics here are independent of per-tenant DB
11279    /// existence.
11280    #[test]
11281    fn tenants_response_sets_cap_reached_header_when_over_cap() {
11282        let runtime = rt();
11283        let h = Harness::new(&runtime);
11284        let r = h.router.clone();
11285        runtime.block_on(async {
11286            // Register 51 tenants (cap = 50, so we exceed it).
11287            h.registry
11288                .with_index(|idx| {
11289                    for i in 0..51 {
11290                        let id = format!("t{i:02}");
11291                        let tid = solo_core::TenantId::new(&id).unwrap();
11292                        idx.register(&tid, &format!("{id}.db"), None).unwrap();
11293                    }
11294                })
11295                .await;
11296            // Send a raw request so we can inspect headers.
11297            use axum::body::Body;
11298            use axum::http::Request;
11299            use http_body_util::BodyExt;
11300            let req = Request::builder()
11301                .method("GET")
11302                .uri("/v1/tenants")
11303                .body(Body::empty())
11304                .unwrap();
11305            let resp = r.oneshot(req).await.unwrap();
11306            assert_eq!(resp.status(), StatusCode::OK);
11307            let cap_header = resp
11308                .headers()
11309                .get(X_SOLO_TENANTS_COUNT_CAP_HEADER)
11310                .expect("cap-reached header must be present");
11311            assert_eq!(
11312                cap_header.to_str().unwrap(),
11313                "true",
11314                "cap-reached header value must be 'true' when over cap"
11315            );
11316            // Parse body to verify shape — beyond-cap tenants have
11317            // null episode_count.
11318            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
11319            let body: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
11320            let arr = body["tenants"].as_array().expect("tenants array");
11321            assert_eq!(arr.len(), 51, "got {} tenants", arr.len());
11322            // The last (sorted-by-created_at_ms) tenant should be
11323            // beyond the cap. The hydration order matches the
11324            // filtered list order, so index 50 is the 51st tenant
11325            // and should have null episode_count.
11326            assert!(
11327                arr[50]["episode_count"].is_null(),
11328                "the 51st tenant (beyond cap) must have null episode_count, got {}",
11329                arr[50]
11330            );
11331        });
11332        h.shutdown(&runtime);
11333    }
11334
11335    /// v0.10.1 test 6: when the response is under the cap, the
11336    /// `X-Solo-Tenants-Count-Cap-Reached` header is absent. Pin the
11337    /// negative case so a future refactor that always emits the
11338    /// header (with "false") doesn't pass silently.
11339    #[test]
11340    fn tenants_response_omits_cap_header_when_under_cap() {
11341        let runtime = rt();
11342        let h = Harness::new(&runtime);
11343        let r = h.router.clone();
11344        runtime.block_on(async {
11345            seed_three_tenants(&h.registry).await;
11346            use axum::body::Body;
11347            use axum::http::Request;
11348            let req = Request::builder()
11349                .method("GET")
11350                .uri("/v1/tenants")
11351                .body(Body::empty())
11352                .unwrap();
11353            let resp = r.oneshot(req).await.unwrap();
11354            assert_eq!(resp.status(), StatusCode::OK);
11355            assert!(
11356                resp.headers()
11357                    .get(X_SOLO_TENANTS_COUNT_CAP_HEADER)
11358                    .is_none(),
11359                "cap-reached header must be absent under the cap"
11360            );
11361        });
11362        h.shutdown(&runtime);
11363    }
11364
11365    // ---- Pure unit tests on the visibility filter ----
11366    //
11367    // These exercise `filter_tenants_for_principal` and
11368    // `is_single_principal_bearer` without an axum router — fast
11369    // feedback for the load-bearing visibility rule. The
11370    // router-level tests above cover the wire path.
11371
11372    /// Build a synthetic `TenantRecord` so the pure unit tests don't
11373    /// need a real SQLCipher round-trip.
11374    fn make_record(id: &str) -> solo_storage::TenantRecord {
11375        solo_storage::TenantRecord {
11376            tenant_id: solo_core::TenantId::new(id).unwrap(),
11377            db_filename: format!("{id}.db"),
11378            display_name: None,
11379            created_at_ms: 0,
11380            status: solo_storage::TenantStatus::Active,
11381            quota_bytes: None,
11382            last_accessed_ms: None,
11383        }
11384    }
11385
11386    #[test]
11387    fn filter_no_principal_returns_all() {
11388        let records = vec![make_record("a"), make_record("b")];
11389        let out = filter_tenants_for_principal(records.clone(), None);
11390        assert_eq!(out.len(), 2);
11391        assert_eq!(out[0].tenant_id.as_str(), "a");
11392        assert_eq!(out[1].tenant_id.as_str(), "b");
11393    }
11394
11395    #[test]
11396    fn filter_bearer_principal_returns_all() {
11397        let records = vec![make_record("a"), make_record("b")];
11398        let p = AuthenticatedPrincipal::bearer(solo_core::TenantId::new("a").unwrap());
11399        let out = filter_tenants_for_principal(records, Some(&p));
11400        assert_eq!(out.len(), 2);
11401    }
11402
11403    #[test]
11404    fn filter_oidc_principal_keeps_only_claim() {
11405        let records = vec![make_record("a"), make_record("b"), make_record("c")];
11406        // OIDC-flavoured principal: non-bearer subject + JSON-object claims.
11407        let p = AuthenticatedPrincipal {
11408            subject: "alice@example.com".to_string(),
11409            tenant_claim: Some(solo_core::TenantId::new("b").unwrap()),
11410            scopes: vec!["read".to_string()],
11411            claims: serde_json::json!({ "sub": "alice@example.com" }),
11412        };
11413        let out = filter_tenants_for_principal(records, Some(&p));
11414        assert_eq!(out.len(), 1);
11415        assert_eq!(out[0].tenant_id.as_str(), "b");
11416    }
11417
11418    #[test]
11419    fn filter_oidc_principal_with_no_claim_returns_empty() {
11420        // Theoretically unreachable — middleware short-circuits at 403
11421        // before we see a no-claim OIDC principal. Defend anyway.
11422        let records = vec![make_record("a")];
11423        let p = AuthenticatedPrincipal {
11424            subject: "alice@example.com".to_string(),
11425            tenant_claim: None,
11426            scopes: vec![],
11427            claims: serde_json::json!({ "sub": "alice@example.com" }),
11428        };
11429        let out = filter_tenants_for_principal(records, Some(&p));
11430        assert!(out.is_empty());
11431    }
11432
11433    #[test]
11434    fn is_single_principal_bearer_discriminator() {
11435        let bearer = AuthenticatedPrincipal::bearer(solo_core::TenantId::new("default").unwrap());
11436        assert!(is_single_principal_bearer(&bearer));
11437
11438        let oidc = AuthenticatedPrincipal {
11439            subject: "alice".to_string(),
11440            tenant_claim: Some(solo_core::TenantId::new("alice").unwrap()),
11441            scopes: vec![],
11442            claims: serde_json::json!({ "x": 1 }),
11443        };
11444        assert!(!is_single_principal_bearer(&oidc));
11445
11446        // Subject == "bearer" but claims is a non-null object → not a
11447        // bearer-shaped principal. Defends against a forged-bearer
11448        // shape that might smuggle JWT claims.
11449        let weird = AuthenticatedPrincipal {
11450            subject: "bearer".to_string(),
11451            tenant_claim: Some(solo_core::TenantId::default_tenant()),
11452            scopes: vec![],
11453            claims: serde_json::json!({ "leak": 1 }),
11454        };
11455        assert!(!is_single_principal_bearer(&weird));
11456    }
11457
11458    // ---------------------------------------------------------------
11459    // v0.10.2 — MCP-over-HTTP transport on /mcp
11460    // ---------------------------------------------------------------
11461    //
11462    // These tests pin the wire contract for the new `/mcp` route added
11463    // in v0.10.2 P2. We exercise the route through the same `Harness`
11464    // pattern the rest of the file uses (in-process axum Router via
11465    // `tower::ServiceExt::oneshot`) — no real TCP listener needed.
11466    //
11467    // The dispatcher's unit tests live in `mcp_dispatch::tests` and
11468    // cover the JSON-RPC envelope shape in isolation. These tests are
11469    // the integration layer: real `TenantHandle`, real `WriterActor`,
11470    // real `SoloMcpServer::dispatch_tool` path.
11471
11472    /// `POST /mcp` with `{jsonrpc, id, method: "tools/list"}` returns
11473    /// the canonical 18 tools. Matches the stdio smoke test
11474    /// `mcp_stdio_lists_eighteen_canonical_tools` from
11475    /// `crates/solo-cli/tests/mcp_smoke.rs` so any drift between the
11476    /// two transports fails one of the two suites loudly.
11477    #[test]
11478    fn mcp_http_tools_list_returns_eighteen_canonical_tools() {
11479        let runtime = rt();
11480        let h = Harness::new(&runtime);
11481        let r = h.router.clone();
11482        runtime.block_on(async move {
11483            let req = json!({
11484                "jsonrpc": "2.0",
11485                "id": 1,
11486                "method": "tools/list",
11487            });
11488            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11489            assert_eq!(status, StatusCode::OK);
11490            assert_eq!(body.get("jsonrpc").and_then(|v| v.as_str()), Some("2.0"));
11491            assert_eq!(body.get("id").and_then(|v| v.as_i64()), Some(1));
11492            let tools = body
11493                .pointer("/result/tools")
11494                .and_then(|v| v.as_array())
11495                .unwrap_or_else(|| panic!("missing /result/tools: {body}"));
11496            let mut names: Vec<String> = tools
11497                .iter()
11498                .filter_map(|t| t.get("name").and_then(|n| n.as_str()).map(String::from))
11499                .collect();
11500            names.sort();
11501            assert_eq!(
11502                names,
11503                vec![
11504                    "memory_context".to_string(),
11505                    "memory_contradiction_resolve".to_string(),
11506                    "memory_contradictions".to_string(),
11507                    "memory_entities".to_string(),
11508                    "memory_facts_about".to_string(),
11509                    "memory_forget".to_string(),
11510                    "memory_forget_document".to_string(),
11511                    "memory_ingest_document".to_string(),
11512                    "memory_inspect".to_string(),
11513                    "memory_inspect_cluster".to_string(),
11514                    "memory_inspect_document".to_string(),
11515                    "memory_list_documents".to_string(),
11516                    "memory_recall".to_string(),
11517                    "memory_remember".to_string(),
11518                    "memory_remember_batch".to_string(),
11519                    "memory_search_docs".to_string(),
11520                    "memory_themes".to_string(),
11521                    "memory_update".to_string(),
11522                ],
11523                "mcp_http: tools/list returned unexpected name set"
11524            );
11525        });
11526        h.shutdown(&runtime);
11527    }
11528
11529    /// `POST /mcp` with `tools/call` for `memory_remember` writes the
11530    /// episode and returns a confirmation string. Then a separate
11531    /// `GET /v1/graph/nodes` call (REST surface) sees the episode —
11532    /// proving one process is serving both surfaces against the same
11533    /// writer.
11534    #[test]
11535    fn mcp_http_remember_writes_episode_visible_via_graph_nodes() {
11536        let runtime = rt();
11537        let h = Harness::new(&runtime);
11538        let r = h.router.clone();
11539        runtime.block_on(async move {
11540            // 1. memory_remember via /mcp.
11541            let req = json!({
11542                "jsonrpc": "2.0",
11543                "id": 2,
11544                "method": "tools/call",
11545                "params": {
11546                    "name": "memory_remember",
11547                    "arguments": { "content": "mcp-http-cross-surface-smoke" },
11548                },
11549            });
11550            let (status, body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
11551            assert_eq!(status, StatusCode::OK);
11552            let result_text = body
11553                .pointer("/result/content/0/text")
11554                .and_then(|v| v.as_str())
11555                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
11556            assert!(
11557                result_text.starts_with("remembered "),
11558                "expected `remembered <id>`, got: {result_text}"
11559            );
11560
11561            // 2. Confirm via /v1/graph/nodes (REST). Same writer, same
11562            //    tenant — the cross-surface smoke that motivates v0.10.2.
11563            //    Episode nodes carry the content under `label` +
11564            //    `preview` (the v0.10.0 graph-nodes wire shape).
11565            let (status2, nodes_body) =
11566                call(r, "GET", "/v1/graph/nodes?kind=episode&limit=10", None).await;
11567            assert_eq!(status2, StatusCode::OK);
11568            let nodes = nodes_body
11569                .get("nodes")
11570                .and_then(|v| v.as_array())
11571                .unwrap_or_else(|| panic!("missing nodes: {nodes_body}"));
11572            assert!(
11573                nodes.iter().any(|n| {
11574                    let label_hit = n
11575                        .get("label")
11576                        .and_then(|c| c.as_str())
11577                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
11578                    let preview_hit = n
11579                        .get("preview")
11580                        .and_then(|c| c.as_str())
11581                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
11582                    label_hit || preview_hit
11583                }),
11584                "graph/nodes didn't surface the MCP-written episode: {nodes_body}"
11585            );
11586        });
11587        h.shutdown(&runtime);
11588    }
11589
11590    /// `memory_remember_batch` must accept the canonical `{ items: [...] }`
11591    /// argument envelope and land all rows in the same graph REST surface.
11592    /// This is the batch variant of the cross-surface smoke above and
11593    /// protects external clients from drifting to a renamed field.
11594    #[test]
11595    fn mcp_http_remember_batch_items_visible_via_graph_nodes() {
11596        let runtime = rt();
11597        let h = Harness::new(&runtime);
11598        let r = h.router.clone();
11599        runtime.block_on(async move {
11600            let marker_a = "mcp-http-batch-cross-surface-smoke-a";
11601            let marker_b = "mcp-http-batch-cross-surface-smoke-b";
11602            let req = json!({
11603                "jsonrpc": "2.0",
11604                "id": 22,
11605                "method": "tools/call",
11606                "params": {
11607                    "name": "memory_remember_batch",
11608                    "arguments": {
11609                        "items": [
11610                            { "content": marker_a, "source_type": "smoke", "salience": 0.7 },
11611                            { "content": marker_b, "source_type": "smoke", "salience": 0.7 }
11612                        ]
11613                    },
11614                },
11615            });
11616            let (status, body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
11617            assert_eq!(status, StatusCode::OK, "batch body: {body}");
11618            let result_text = body
11619                .pointer("/result/content/0/text")
11620                .and_then(|v| v.as_str())
11621                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
11622            let ids: Vec<String> = serde_json::from_str(result_text)
11623                .unwrap_or_else(|e| panic!("batch result should be JSON id array: {e}: {body}"));
11624            assert_eq!(ids.len(), 2, "two items in, two ids out: {result_text}");
11625
11626            let (status2, nodes_body) =
11627                call(r, "GET", "/v1/graph/nodes?kind=episode&limit=10", None).await;
11628            assert_eq!(status2, StatusCode::OK);
11629            let nodes = nodes_body
11630                .get("nodes")
11631                .and_then(|v| v.as_array())
11632                .unwrap_or_else(|| panic!("missing nodes: {nodes_body}"));
11633            for marker in [marker_a, marker_b] {
11634                assert!(
11635                    nodes.iter().any(|n| {
11636                        let label_hit = n
11637                            .get("label")
11638                            .and_then(|c| c.as_str())
11639                            .is_some_and(|s| s.contains(marker));
11640                        let preview_hit = n
11641                            .get("preview")
11642                            .and_then(|c| c.as_str())
11643                            .is_some_and(|s| s.contains(marker));
11644                        label_hit || preview_hit
11645                    }),
11646                    "graph/nodes didn't surface batch marker {marker}: {nodes_body}"
11647                );
11648            }
11649        });
11650        h.shutdown(&runtime);
11651    }
11652
11653    /// `memory_remember_batch`'s HTTP MCP contract is the canonical
11654    /// `{ items: [...] }` envelope. A drift back to `{ entries: [...] }`
11655    /// must fail as JSON-RPC invalid_params, not silently accept or
11656    /// partially write.
11657    #[test]
11658    fn mcp_http_remember_batch_rejects_entries_envelope() {
11659        let runtime = rt();
11660        let h = Harness::new(&runtime);
11661        let r = h.router.clone();
11662        runtime.block_on(async move {
11663            let req = json!({
11664                "jsonrpc": "2.0",
11665                "id": 23,
11666                "method": "tools/call",
11667                "params": {
11668                    "name": "memory_remember_batch",
11669                    "arguments": {
11670                        "entries": [
11671                            { "content": "legacy-entries-envelope" }
11672                        ]
11673                    },
11674                },
11675            });
11676            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11677            assert_eq!(
11678                status,
11679                StatusCode::OK,
11680                "JSON-RPC errors stay in-body: {body}"
11681            );
11682            assert_eq!(
11683                body.pointer("/error/code").and_then(|v| v.as_i64()),
11684                Some(-32602),
11685                "expected JSON-RPC INVALID_PARAMS (-32602), got: {body}"
11686            );
11687            let message = body
11688                .pointer("/error/message")
11689                .and_then(|v| v.as_str())
11690                .unwrap_or("");
11691            assert!(
11692                message.contains("invalid tool arguments") && message.contains("items"),
11693                "error must point clients back to the canonical items envelope: {body}"
11694            );
11695        });
11696        h.shutdown(&runtime);
11697    }
11698
11699    /// `POST /mcp` with `tools/call` for `memory_recall` returns the
11700    /// just-remembered episode. Smoke for the read path under the new
11701    /// transport.
11702    #[test]
11703    fn mcp_http_recall_returns_just_remembered_episode() {
11704        let runtime = rt();
11705        let h = Harness::new(&runtime);
11706        let r = h.router.clone();
11707        runtime.block_on(async move {
11708            // Remember first.
11709            let needle = "mcp-http-recall-needle-deadbeef";
11710            let req = json!({
11711                "jsonrpc": "2.0",
11712                "id": 3,
11713                "method": "tools/call",
11714                "params": {
11715                    "name": "memory_remember",
11716                    "arguments": { "content": needle },
11717                },
11718            });
11719            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
11720            assert_eq!(status, StatusCode::OK);
11721
11722            // Recall via the same /mcp transport.
11723            let req = json!({
11724                "jsonrpc": "2.0",
11725                "id": 4,
11726                "method": "tools/call",
11727                "params": {
11728                    "name": "memory_recall",
11729                    "arguments": { "query": needle, "limit": 5 },
11730                },
11731            });
11732            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11733            assert_eq!(status, StatusCode::OK);
11734            let recall_text = body
11735                .pointer("/result/content/0/text")
11736                .and_then(|v| v.as_str())
11737                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
11738            assert!(
11739                recall_text.contains(needle),
11740                "recall didn't surface needle `{needle}`: {recall_text}"
11741            );
11742        });
11743        h.shutdown(&runtime);
11744    }
11745
11746    /// Malformed JSON body must surface as 400 (the wire envelope is
11747    /// invalid; the JSON-RPC layer never sees the request). The error
11748    /// body shape matches the rest of the API (`{error, status}`) so
11749    /// existing client error-handling paths keep working.
11750    #[test]
11751    fn mcp_http_malformed_body_returns_400() {
11752        let runtime = rt();
11753        let h = Harness::new(&runtime);
11754        let r = h.router.clone();
11755        runtime.block_on(async move {
11756            let req = Request::builder()
11757                .method("POST")
11758                .uri("/mcp")
11759                .header("content-type", "application/json")
11760                .body(Body::from("not-json-at-all".as_bytes()))
11761                .unwrap();
11762            let resp = r.oneshot(req).await.unwrap();
11763            assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
11764            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
11765            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
11766            assert!(
11767                v.get("error")
11768                    .and_then(|e| e.as_str())
11769                    .map(|s| s.contains("invalid JSON-RPC request"))
11770                    .unwrap_or(false),
11771                "got: {v}"
11772            );
11773        });
11774        h.shutdown(&runtime);
11775    }
11776
11777    /// Wrong `jsonrpc` version must surface as 400. JSON-RPC 2.0 §4
11778    /// requires the literal string `"2.0"`.
11779    #[test]
11780    fn mcp_http_wrong_jsonrpc_version_returns_400() {
11781        let runtime = rt();
11782        let h = Harness::new(&runtime);
11783        let r = h.router.clone();
11784        runtime.block_on(async move {
11785            let req = json!({
11786                "jsonrpc": "1.0",
11787                "id": 1,
11788                "method": "tools/list",
11789            });
11790            let (status, _body) = call(r, "POST", "/mcp", Some(req)).await;
11791            assert_eq!(status, StatusCode::BAD_REQUEST);
11792        });
11793        h.shutdown(&runtime);
11794    }
11795
11796    /// Unknown method returns a JSON-RPC error envelope with code
11797    /// -32601 (METHOD_NOT_FOUND). HTTP status stays 200 because the
11798    /// envelope itself parsed fine — JSON-RPC errors are in-body.
11799    #[test]
11800    fn mcp_http_unknown_method_returns_in_body_method_not_found() {
11801        let runtime = rt();
11802        let h = Harness::new(&runtime);
11803        let r = h.router.clone();
11804        runtime.block_on(async move {
11805            let req = json!({
11806                "jsonrpc": "2.0",
11807                "id": 5,
11808                "method": "definitely/not/a/method",
11809            });
11810            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11811            assert_eq!(status, StatusCode::OK);
11812            assert_eq!(
11813                body.pointer("/error/code").and_then(|v| v.as_i64()),
11814                Some(-32601),
11815                "expected JSON-RPC METHOD_NOT_FOUND (-32601), got: {body}"
11816            );
11817        });
11818        h.shutdown(&runtime);
11819    }
11820
11821    /// `POST /mcp` with the bearer-auth middleware enabled returns
11822    /// 401 without the token and 200 with the correct token.
11823    #[test]
11824    fn mcp_http_post_respects_bearer_auth() {
11825        let runtime = rt();
11826        let h = Harness::new_with_auth(&runtime, Some("secret-mcp-token".into()));
11827        let r = h.router.clone();
11828        runtime.block_on(async move {
11829            // No Authorization header → 401.
11830            let req = json!({
11831                "jsonrpc": "2.0",
11832                "id": 6,
11833                "method": "tools/list",
11834            });
11835            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req.clone())).await;
11836            assert_eq!(status, StatusCode::UNAUTHORIZED);
11837
11838            // With correct bearer → 200 + valid JSON-RPC reply.
11839            let (status, body) = call_with_auth(
11840                r,
11841                "POST",
11842                "/mcp",
11843                Some(req),
11844                Some("Bearer secret-mcp-token"),
11845            )
11846            .await;
11847            assert_eq!(status, StatusCode::OK);
11848            assert_eq!(
11849                body.pointer("/result/tools")
11850                    .and_then(|v| v.as_array())
11851                    .map(|a| a.len()),
11852                Some(18),
11853                "authed tools/list should still return 18 tools: {body}"
11854            );
11855        });
11856        h.shutdown(&runtime);
11857    }
11858
11859    /// `/mcp` goes through the same `TenantExtractor` as REST graph
11860    /// routes. Invalid tenant ids are rejected before JSON-RPC dispatch
11861    /// so clients don't accidentally create a session against a bad
11862    /// tenant key.
11863    #[test]
11864    fn mcp_http_post_rejects_invalid_tenant_header() {
11865        let runtime = rt();
11866        let h = Harness::new(&runtime);
11867        let r = h.router.clone();
11868        runtime.block_on(async move {
11869            let req = json!({
11870                "jsonrpc": "2.0",
11871                "id": 7,
11872                "method": "tools/list",
11873            });
11874            let (status, body) = call_with_tenant(r, "POST", "/mcp", Some(req), "UPPER").await;
11875            assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
11876            let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
11877            assert!(
11878                msg.contains("x-solo-tenant") && msg.contains("invalid tenant id"),
11879                "error must identify the invalid tenant header: {body}"
11880            );
11881        });
11882        h.shutdown(&runtime);
11883    }
11884
11885    /// Unknown tenants should be a route-level 404 on `/mcp`, matching
11886    /// REST. This protects solo-jarvis from receiving a JSON-RPC-looking
11887    /// success envelope for a typoed tenant.
11888    #[test]
11889    fn mcp_http_post_rejects_unknown_tenant_header() {
11890        let runtime = rt();
11891        let h = Harness::new(&runtime);
11892        let r = h.router.clone();
11893        runtime.block_on(async move {
11894            let req = json!({
11895                "jsonrpc": "2.0",
11896                "id": 8,
11897                "method": "tools/list",
11898            });
11899            let (status, body) =
11900                call_with_tenant(r, "POST", "/mcp", Some(req), "never-registered").await;
11901            assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
11902            let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
11903            assert!(
11904                msg.contains("tenant") && msg.contains("not found"),
11905                "error must identify the missing tenant: {body}"
11906            );
11907        });
11908        h.shutdown(&runtime);
11909    }
11910
11911    /// CORS preflight (`OPTIONS /mcp`) from a localhost origin returns
11912    /// 200 (tower-http's CorsLayer handles preflight implicitly) and
11913    /// the `access-control-allow-headers` carries both
11914    /// `x-solo-tenant` and `mcp-session-id`. Pins the v0.10.2
11915    /// allow-list addition.
11916    #[test]
11917    fn mcp_http_cors_preflight_allows_mcp_session_id_header() {
11918        let runtime = rt();
11919        let h = Harness::new(&runtime);
11920        let r = h.router.clone();
11921        runtime.block_on(async move {
11922            let req = Request::builder()
11923                .method("OPTIONS")
11924                .uri("/mcp")
11925                .header("origin", "http://localhost:5173")
11926                .header("access-control-request-method", "POST")
11927                .header(
11928                    "access-control-request-headers",
11929                    "content-type, mcp-session-id, x-solo-tenant, authorization",
11930                )
11931                .body(Body::empty())
11932                .unwrap();
11933            let resp = r.oneshot(req).await.unwrap();
11934            // tower-http CorsLayer returns 200 for permitted preflight.
11935            assert_eq!(resp.status(), StatusCode::OK);
11936            let allow_headers = resp
11937                .headers()
11938                .get("access-control-allow-headers")
11939                .and_then(|h| h.to_str().ok())
11940                .unwrap_or("")
11941                .to_lowercase();
11942            assert!(
11943                allow_headers.contains("mcp-session-id"),
11944                "preflight allow-headers must include mcp-session-id; got: {allow_headers}"
11945            );
11946            assert!(
11947                allow_headers.contains("x-solo-tenant"),
11948                "preflight allow-headers must still include x-solo-tenant; got: {allow_headers}"
11949            );
11950            // Allow-origin must echo the localhost origin (per the
11951            // permissive-localhost predicate).
11952            let allow_origin = resp
11953                .headers()
11954                .get("access-control-allow-origin")
11955                .and_then(|h| h.to_str().ok())
11956                .unwrap_or("");
11957            assert_eq!(allow_origin, "http://localhost:5173");
11958        });
11959        h.shutdown(&runtime);
11960    }
11961
11962    /// CORS preflight for browser memory corrections must allow PATCH.
11963    /// solo-web sends `PATCH /memory/{id}` with `content-type` and
11964    /// `x-solo-tenant`; if PATCH is absent from the allow-methods list,
11965    /// the browser rejects the request before it reaches the API.
11966    #[test]
11967    fn memory_update_cors_preflight_allows_patch() {
11968        let runtime = rt();
11969        let h = Harness::new(&runtime);
11970        let r = h.router.clone();
11971        runtime.block_on(async move {
11972            let req = Request::builder()
11973                .method("OPTIONS")
11974                .uri("/memory/ep:test")
11975                .header("origin", "http://localhost:5173")
11976                .header("access-control-request-method", "PATCH")
11977                .header(
11978                    "access-control-request-headers",
11979                    "content-type, x-solo-tenant",
11980                )
11981                .body(Body::empty())
11982                .unwrap();
11983            let resp = r.oneshot(req).await.unwrap();
11984            assert_eq!(resp.status(), StatusCode::OK);
11985            let allow_methods = resp
11986                .headers()
11987                .get("access-control-allow-methods")
11988                .and_then(|h| h.to_str().ok())
11989                .unwrap_or("")
11990                .to_lowercase();
11991            assert!(
11992                allow_methods.contains("patch"),
11993                "preflight allow-methods must include patch; got: {allow_methods}"
11994            );
11995            let allow_headers = resp
11996                .headers()
11997                .get("access-control-allow-headers")
11998                .and_then(|h| h.to_str().ok())
11999                .unwrap_or("")
12000                .to_lowercase();
12001            assert!(
12002                allow_headers.contains("x-solo-tenant"),
12003                "preflight allow-headers must include x-solo-tenant; got: {allow_headers}"
12004            );
12005            assert_eq!(
12006                resp.headers()
12007                    .get("access-control-allow-origin")
12008                    .and_then(|h| h.to_str().ok()),
12009                Some("http://localhost:5173")
12010            );
12011        });
12012        h.shutdown(&runtime);
12013    }
12014
12015    /// Notification messages (no `id`) return 202 Accepted with an
12016    /// empty body. Per JSON-RPC 2.0 §4.1 the server MUST NOT reply.
12017    #[test]
12018    fn mcp_http_notification_returns_202_accepted() {
12019        let runtime = rt();
12020        let h = Harness::new(&runtime);
12021        let r = h.router.clone();
12022        runtime.block_on(async move {
12023            let req = json!({
12024                "jsonrpc": "2.0",
12025                "method": "notifications/initialized",
12026                "params": {},
12027            });
12028            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
12029            assert_eq!(status, StatusCode::ACCEPTED);
12030            // Empty body — call() returns Value::Null when the body is
12031            // empty.
12032            assert_eq!(body, Value::Null);
12033        });
12034        h.shutdown(&runtime);
12035    }
12036
12037    // ---------------------------------------------------------------
12038    // v0.11.0 P1 — MCP `Mcp-Session-Id` middleware integration tests
12039    // ---------------------------------------------------------------
12040    //
12041    // These pin the per-request session contract: the POST handler
12042    // creates a fresh session id on a request that arrives without
12043    // the header (echoed back via `Mcp-Session-Id` response header);
12044    // a subsequent request carrying that same id continues using the
12045    // same session record; unknown or stale ids surface as 404 with
12046    // a re-init instruction. The lazy/background expiry semantics are
12047    // unit-tested in `mcp_session::tests`.
12048
12049    /// `POST /mcp` with `tools/list` (no `Mcp-Session-Id` header) must
12050    /// echo back a fresh session id in the response header. The
12051    /// session count in the store grows by exactly 1.
12052    #[test]
12053    fn mcp_post_without_session_id_creates_new_session() {
12054        let runtime = rt();
12055        let h = Harness::new(&runtime);
12056        let r = h.router.clone();
12057        runtime.block_on(async move {
12058            let req = Request::builder()
12059                .method("POST")
12060                .uri("/mcp")
12061                .header("content-type", "application/json")
12062                .body(Body::from(
12063                    serde_json::to_vec(&json!({
12064                        "jsonrpc": "2.0",
12065                        "id": 100,
12066                        "method": "tools/list",
12067                    }))
12068                    .unwrap(),
12069                ))
12070                .unwrap();
12071            let resp = r.oneshot(req).await.unwrap();
12072            assert_eq!(resp.status(), StatusCode::OK);
12073            let session_id = resp
12074                .headers()
12075                .get("mcp-session-id")
12076                .and_then(|v| v.to_str().ok())
12077                .map(|s| s.to_string())
12078                .unwrap_or_else(|| {
12079                    panic!(
12080                        "mcp-session-id response header missing on session-init POST: {:?}",
12081                        resp.headers()
12082                    )
12083                });
12084            assert!(
12085                !session_id.is_empty(),
12086                "session id must be a non-empty string"
12087            );
12088        });
12089        h.shutdown(&runtime);
12090    }
12091
12092    /// Two `POST /mcp` calls with the same session id in the request
12093    /// header must hit the same `SessionState` (i.e. no new entry
12094    /// gets allocated). The second response echoes the same id back.
12095    #[test]
12096    fn mcp_post_with_valid_session_id_continues_session() {
12097        let runtime = rt();
12098        let h = Harness::new(&runtime);
12099        let r = h.router.clone();
12100        runtime.block_on(async move {
12101            // First request: no header → fresh id.
12102            let req = Request::builder()
12103                .method("POST")
12104                .uri("/mcp")
12105                .header("content-type", "application/json")
12106                .body(Body::from(
12107                    serde_json::to_vec(&json!({
12108                        "jsonrpc": "2.0",
12109                        "id": 101,
12110                        "method": "tools/list",
12111                    }))
12112                    .unwrap(),
12113                ))
12114                .unwrap();
12115            let resp1 = r.clone().oneshot(req).await.unwrap();
12116            assert_eq!(resp1.status(), StatusCode::OK);
12117            let assigned_id = resp1
12118                .headers()
12119                .get("mcp-session-id")
12120                .and_then(|v| v.to_str().ok())
12121                .map(|s| s.to_string())
12122                .expect("first response must carry mcp-session-id");
12123
12124            // Second request: carry the same id forward.
12125            let req2 = Request::builder()
12126                .method("POST")
12127                .uri("/mcp")
12128                .header("content-type", "application/json")
12129                .header("mcp-session-id", &assigned_id)
12130                .body(Body::from(
12131                    serde_json::to_vec(&json!({
12132                        "jsonrpc": "2.0",
12133                        "id": 102,
12134                        "method": "tools/list",
12135                    }))
12136                    .unwrap(),
12137                ))
12138                .unwrap();
12139            let resp2 = r.oneshot(req2).await.unwrap();
12140            assert_eq!(resp2.status(), StatusCode::OK);
12141            let echoed = resp2
12142                .headers()
12143                .get("mcp-session-id")
12144                .and_then(|v| v.to_str().ok())
12145                .map(|s| s.to_string())
12146                .expect("continuation response must echo mcp-session-id");
12147            assert_eq!(
12148                echoed, assigned_id,
12149                "second response must echo the same session id"
12150            );
12151        });
12152        h.shutdown(&runtime);
12153    }
12154
12155    /// A `POST /mcp` carrying a random / never-assigned `Mcp-Session-Id`
12156    /// must surface as 404 with the `session_expired` error
12157    /// discriminator and the re-initialize instruction in the body.
12158    #[test]
12159    fn mcp_post_with_unknown_session_id_returns_404() {
12160        let runtime = rt();
12161        let h = Harness::new(&runtime);
12162        let r = h.router.clone();
12163        runtime.block_on(async move {
12164            let req = Request::builder()
12165                .method("POST")
12166                .uri("/mcp")
12167                .header("content-type", "application/json")
12168                // A plausibly-shaped id the server never assigned.
12169                .header("mcp-session-id", "11111111-2222-3333-4444-555555555555")
12170                .body(Body::from(
12171                    serde_json::to_vec(&json!({
12172                        "jsonrpc": "2.0",
12173                        "id": 103,
12174                        "method": "tools/list",
12175                    }))
12176                    .unwrap(),
12177                ))
12178                .unwrap();
12179            let resp = r.oneshot(req).await.unwrap();
12180            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
12181            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
12182            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12183            assert_eq!(
12184                v.get("error").and_then(|e| e.as_str()),
12185                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12186                "404 body must carry the session_expired discriminator: {v}"
12187            );
12188            assert!(
12189                v.get("retry")
12190                    .and_then(|e| e.as_str())
12191                    .map(|s| s == "re-initialize")
12192                    .unwrap_or(false),
12193                "404 body must instruct re-initialize: {v}"
12194            );
12195        });
12196        h.shutdown(&runtime);
12197    }
12198
12199    /// A `POST /mcp` carrying a `Mcp-Session-Id` that WAS assigned but
12200    /// has since been expired (we evict it directly from the store to
12201    /// simulate the TTL sweep) must surface the same 404 +
12202    /// `session_expired` discriminator. Distinct from the
12203    /// "unknown id" test above — same wire response, different cause.
12204    #[test]
12205    fn mcp_post_with_expired_session_id_returns_404() {
12206        let runtime = rt();
12207        let h = Harness::new(&runtime);
12208        let r = h.router.clone();
12209        let store = h.mcp_sessions.clone();
12210        runtime.block_on(async move {
12211            // First request to allocate a session id.
12212            let req1 = Request::builder()
12213                .method("POST")
12214                .uri("/mcp")
12215                .header("content-type", "application/json")
12216                .body(Body::from(
12217                    serde_json::to_vec(&json!({
12218                        "jsonrpc": "2.0",
12219                        "id": 104,
12220                        "method": "tools/list",
12221                    }))
12222                    .unwrap(),
12223                ))
12224                .unwrap();
12225            let resp1 = r.clone().oneshot(req1).await.unwrap();
12226            let assigned_id_str = resp1
12227                .headers()
12228                .get("mcp-session-id")
12229                .and_then(|v| v.to_str().ok())
12230                .map(|s| s.to_string())
12231                .expect("first response must carry mcp-session-id");
12232
12233            // Force-evict the session directly via the harness's
12234            // SessionStore clone. This is the moral equivalent of the
12235            // background sweep evicting an entry past TTL — same
12236            // observable from the wire (the handler's middleware sees
12237            // `SessionStore::get` return `None`). Driving the real
12238            // 30-min inactivity clock is not test-friendly.
12239            let parsed = crate::mcp_session::SessionId::parse(&assigned_id_str)
12240                .expect("just-assigned id must parse");
12241            assert!(store.delete(&parsed), "stored session must be deletable");
12242
12243            // Now the id is "stale" (no longer in the store) — same
12244            // observable as a TTL eviction.
12245            let req2 = Request::builder()
12246                .method("POST")
12247                .uri("/mcp")
12248                .header("content-type", "application/json")
12249                .header("mcp-session-id", &assigned_id_str)
12250                .body(Body::from(
12251                    serde_json::to_vec(&json!({
12252                        "jsonrpc": "2.0",
12253                        "id": 105,
12254                        "method": "tools/list",
12255                    }))
12256                    .unwrap(),
12257                ))
12258                .unwrap();
12259            let resp2 = r.oneshot(req2).await.unwrap();
12260            assert_eq!(resp2.status(), StatusCode::NOT_FOUND);
12261            let body_bytes = resp2.into_body().collect().await.unwrap().to_bytes();
12262            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12263            assert_eq!(
12264                v.get("error").and_then(|e| e.as_str()),
12265                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12266                "expired-session 404 body must carry session_expired: {v}"
12267            );
12268        });
12269        h.shutdown(&runtime);
12270    }
12271
12272    /// v0.11.0 P2: `GET /mcp` REQUIRES an `Mcp-Session-Id` header. The
12273    /// GET stream is "attach to an existing session's notification
12274    /// channel" — there's no session-init story over GET (POST owns
12275    /// session creation). A GET without the header must return 404
12276    /// with the `session_expired` discriminator + `re-initialize`
12277    /// instruction, mirroring the unknown-id 404 wire shape so clients
12278    /// have a single recovery code path.
12279    ///
12280    /// Diverges deliberately from v0.11.0 P1's behaviour (which
12281    /// auto-created on GET) — see `docs/dev-log/0134-v0.11.0-p2-impl.md`
12282    /// for the rationale.
12283    #[test]
12284    fn mcp_get_without_session_id_returns_404() {
12285        let runtime = rt();
12286        let h = Harness::new(&runtime);
12287        let r = h.router.clone();
12288        runtime.block_on(async move {
12289            let req = Request::builder()
12290                .method("GET")
12291                .uri("/mcp")
12292                .header("accept", "text/event-stream")
12293                .body(Body::empty())
12294                .unwrap();
12295            let resp = r.oneshot(req).await.unwrap();
12296            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
12297            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
12298            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12299            assert_eq!(
12300                v.get("error").and_then(|e| e.as_str()),
12301                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12302                "GET /mcp without session id must carry session_expired: {v}"
12303            );
12304            assert_eq!(
12305                v.get("retry").and_then(|e| e.as_str()),
12306                Some("re-initialize"),
12307            );
12308        });
12309        h.shutdown(&runtime);
12310    }
12311
12312    // ---------------------------------------------------------------
12313    // v0.11.0 P2 — resumable /mcp GET stream + Last-Event-ID
12314    // ---------------------------------------------------------------
12315    //
12316    // These pin the v0.11.0 P2 wire contract for the resumable GET
12317    // stream: an `Mcp-Session-Id`-bound subscriber sees `event: init`
12318    // first, then any buffered replay events past `Last-Event-ID`,
12319    // then live broadcast events as they're published. The unit-test
12320    // half of the contract (publish_event monotonic + buffer cap) lives
12321    // in `crate::mcp_session::tests`.
12322
12323    /// Open the `/mcp` GET stream for one session id. Returns
12324    /// `(status, body)` where the body is the SSE frame stream.
12325    async fn open_mcp_get_stream(
12326        router: axum::Router,
12327        session_id: &str,
12328        last_event_id: Option<&str>,
12329    ) -> (StatusCode, axum::body::Body, axum::http::HeaderMap) {
12330        let mut builder = Request::builder()
12331            .method("GET")
12332            .uri("/mcp")
12333            .header("accept", "text/event-stream")
12334            .header(crate::mcp_session::MCP_SESSION_ID_HEADER, session_id);
12335        if let Some(leid) = last_event_id {
12336            builder = builder.header(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER, leid);
12337        }
12338        let req = builder
12339            .header("content-length", "0")
12340            .body(Body::empty())
12341            .unwrap();
12342        let resp = router.oneshot(req).await.expect("oneshot");
12343        let status = resp.status();
12344        let headers = resp.headers().clone();
12345        let body = resp.into_body();
12346        (status, body, headers)
12347    }
12348
12349    /// Allocate one session via a POST so a follow-up GET can attach.
12350    /// Returns the assigned session id from the response header.
12351    async fn allocate_mcp_session(router: axum::Router) -> String {
12352        let req = Request::builder()
12353            .method("POST")
12354            .uri("/mcp")
12355            .header("content-type", "application/json")
12356            .body(Body::from(
12357                serde_json::to_vec(&json!({
12358                    "jsonrpc": "2.0",
12359                    "id": 1,
12360                    "method": "tools/list",
12361                }))
12362                .unwrap(),
12363            ))
12364            .unwrap();
12365        let resp = router.oneshot(req).await.expect("oneshot");
12366        assert_eq!(resp.status(), StatusCode::OK, "POST must allocate session");
12367        resp.headers()
12368            .get(crate::mcp_session::MCP_SESSION_ID_HEADER)
12369            .and_then(|v| v.to_str().ok())
12370            .map(|s| s.to_string())
12371            .expect("POST must echo Mcp-Session-Id")
12372    }
12373
12374    /// Look up the in-store `Arc<SessionState>` so a test can publish
12375    /// events directly onto the same record the GET handler subscribed
12376    /// to. Takes the [`SessionStore`] directly so callers can clone it
12377    /// out of the harness before moving the harness into the async
12378    /// block.
12379    fn session_state_for_test(
12380        store: &crate::mcp_session::SessionStore,
12381        session_id: &str,
12382    ) -> std::sync::Arc<crate::mcp_session::SessionState> {
12383        let parsed =
12384            crate::mcp_session::SessionId::parse(session_id).expect("test session id must parse");
12385        store.get(&parsed).expect("session must still be in store")
12386    }
12387
12388    /// GET `/mcp` against a session that's been force-evicted (TTL
12389    /// sweep) returns 404 with the `session_expired` discriminator —
12390    /// same wire shape as POST.
12391    #[test]
12392    fn mcp_get_with_expired_session_id_returns_404() {
12393        let runtime = rt();
12394        let h = Harness::new(&runtime);
12395        let r = h.router.clone();
12396        let store = h.mcp_sessions.clone();
12397        runtime.block_on(async move {
12398            let session_id = allocate_mcp_session(r.clone()).await;
12399            // Force-evict via the harness store handle.
12400            let parsed = crate::mcp_session::SessionId::parse(&session_id).unwrap();
12401            assert!(store.delete(&parsed));
12402            // Now GET against the stale id.
12403            let req = Request::builder()
12404                .method("GET")
12405                .uri("/mcp")
12406                .header("accept", "text/event-stream")
12407                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12408                .body(Body::empty())
12409                .unwrap();
12410            let resp = r.oneshot(req).await.unwrap();
12411            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
12412            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
12413            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
12414            assert_eq!(
12415                v.get("error").and_then(|e| e.as_str()),
12416                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
12417            );
12418        });
12419        h.shutdown(&runtime);
12420    }
12421
12422    /// Happy-path subscribe: open `/mcp` with a freshly-allocated
12423    /// session id, expect `event: init` as the first frame with the
12424    /// session id echoed in both the response header AND the init
12425    /// payload.
12426    #[test]
12427    fn mcp_get_with_valid_session_id_subscribes() {
12428        let runtime = rt();
12429        let h = Harness::new(&runtime);
12430        let r = h.router.clone();
12431        runtime.block_on(async move {
12432            let session_id = allocate_mcp_session(r.clone()).await;
12433            let (status, mut body, headers) = open_mcp_get_stream(r, &session_id, None).await;
12434            assert_eq!(status, StatusCode::OK);
12435            // Response header echoes the session id.
12436            let echoed = headers
12437                .get(crate::mcp_session::MCP_SESSION_ID_HEADER)
12438                .and_then(|v| v.to_str().ok())
12439                .unwrap();
12440            assert_eq!(echoed, session_id);
12441            // First frame is the init event.
12442            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12443                .await
12444                .expect("init event must arrive within 2s");
12445            assert_eq!(ev.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12446            assert_eq!(ev.data["connected"].as_bool(), Some(true));
12447            assert_eq!(ev.data["session_id"].as_str(), Some(session_id.as_str()));
12448            // Init carries id "0" (reserved sentinel — the first real
12449            // publish_event allocates id 1).
12450            assert_eq!(ev.id.as_deref(), Some("0"));
12451        });
12452        h.shutdown(&runtime);
12453    }
12454
12455    /// Publish 5 events on the session, reconnect with
12456    /// `Last-Event-ID: 2`, observe `init` then events 3, 4, 5 (in
12457    /// order). Pins the resume-from-cursor contract.
12458    #[test]
12459    fn mcp_get_resumes_from_last_event_id() {
12460        let runtime = rt();
12461        let h = Harness::new(&runtime);
12462        let r = h.router.clone();
12463        let store = h.mcp_sessions.clone();
12464        runtime.block_on(async move {
12465            let session_id = allocate_mcp_session(r.clone()).await;
12466            let state = session_state_for_test(&store, &session_id);
12467            for i in 1..=5 {
12468                state.publish_event(crate::mcp_session::McpEventKind::Message, json!({"n": i}));
12469            }
12470            let (status, mut body, _) = open_mcp_get_stream(r, &session_id, Some("2")).await;
12471            assert_eq!(status, StatusCode::OK);
12472            // First frame is init.
12473            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12474                .await
12475                .unwrap();
12476            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12477            // Then events 3, 4, 5 in order.
12478            for expected_id in 3..=5 {
12479                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12480                    .await
12481                    .expect("replay event must arrive within 2s");
12482                assert_eq!(
12483                    ev.event,
12484                    crate::mcp_session::MCP_STREAM_EVENT_MESSAGE_NAME,
12485                    "expected replay of message event id {expected_id}, got {ev:?}",
12486                );
12487                assert_eq!(ev.id.as_deref(), Some(expected_id.to_string().as_str()));
12488                assert_eq!(ev.data["n"].as_u64(), Some(expected_id));
12489            }
12490        });
12491        h.shutdown(&runtime);
12492    }
12493
12494    /// Publish past the broadcast buffer's capacity (300 events) then
12495    /// reconnect with `Last-Event-ID: 0` (the sentinel for "I just
12496    /// joined and missed everything since event 1"). Observe `event:
12497    /// init`, then a synthetic `event: lagged` describing the gap,
12498    /// then the tail of the buffer.
12499    #[test]
12500    fn mcp_get_emits_lagged_when_last_event_id_too_old() {
12501        let runtime = rt();
12502        let h = Harness::new(&runtime);
12503        let r = h.router.clone();
12504        let store = h.mcp_sessions.clone();
12505        runtime.block_on(async move {
12506            let session_id = allocate_mcp_session(r.clone()).await;
12507            let state = session_state_for_test(&store, &session_id);
12508            // Publish 300 events — buffer cap is 256, so events 1..=44
12509            // get evicted (oldest retained id = 45).
12510            for _ in 0..300 {
12511                state.publish_event(crate::mcp_session::McpEventKind::Message, json!({}));
12512            }
12513            // Last-Event-ID: 1 — claim we've only seen event 1, but
12514            // event 2 (and 3..=44) are gone from the buffer.
12515            let (status, mut body, _) = open_mcp_get_stream(r, &session_id, Some("1")).await;
12516            assert_eq!(status, StatusCode::OK);
12517            // First frame: init.
12518            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12519                .await
12520                .unwrap();
12521            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12522            // Second frame: lagged (synthetic) with id 0.
12523            let lagged = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
12524                .await
12525                .expect("lagged event must arrive within 2s");
12526            assert_eq!(
12527                lagged.event,
12528                crate::mcp_session::MCP_STREAM_EVENT_LAGGED_NAME,
12529                "expected `event: lagged` after Last-Event-ID before buffer",
12530            );
12531            assert_eq!(lagged.id.as_deref(), Some("0"));
12532            assert!(
12533                lagged.data["dropped"].as_u64().unwrap_or(0) > 0,
12534                "lagged event must carry a non-zero `dropped` count: {:?}",
12535                lagged.data,
12536            );
12537        });
12538        h.shutdown(&runtime);
12539    }
12540
12541    /// CORS preflight (OPTIONS) with `Access-Control-Request-Headers:
12542    /// last-event-id` must succeed and the `last-event-id` header must
12543    /// appear in `Access-Control-Allow-Headers`. Without this, a
12544    /// browser reconnecting an SSE stream with `Last-Event-ID:` fails
12545    /// the preflight before the actual GET lands.
12546    #[test]
12547    fn cors_preflight_allows_last_event_id_header() {
12548        let runtime = rt();
12549        let h = Harness::new(&runtime);
12550        let r = h.router.clone();
12551        runtime.block_on(async move {
12552            let req = Request::builder()
12553                .method("OPTIONS")
12554                .uri("/mcp")
12555                .header("origin", "http://localhost:5173")
12556                .header("access-control-request-method", "GET")
12557                .header(
12558                    "access-control-request-headers",
12559                    "last-event-id,mcp-session-id",
12560                )
12561                .body(Body::empty())
12562                .unwrap();
12563            let resp = r.oneshot(req).await.unwrap();
12564            assert!(
12565                resp.status().is_success() || resp.status() == StatusCode::NO_CONTENT,
12566                "preflight must succeed, got: {}",
12567                resp.status(),
12568            );
12569            let allow = resp
12570                .headers()
12571                .get("access-control-allow-headers")
12572                .and_then(|h| h.to_str().ok())
12573                .map(|s| s.to_ascii_lowercase())
12574                .unwrap_or_default();
12575            assert!(
12576                allow.contains("last-event-id"),
12577                "preflight must allow `last-event-id`; allow-headers = {allow:?}",
12578            );
12579            assert!(
12580                allow.contains("mcp-session-id"),
12581                "preflight must allow `mcp-session-id` too; allow-headers = {allow:?}",
12582            );
12583        });
12584        h.shutdown(&runtime);
12585    }
12586
12587    /// Heartbeat cadence: with a short interval, the stream emits a
12588    /// typed `event: heartbeat` after the init event. The production
12589    /// cadence is [`MCP_STREAM_HEARTBEAT_SECS`] (30s); the test
12590    /// exercises `build_mcp_session_stream` directly with a 1-second
12591    /// interval so we don't burn 30s of CI wall time.
12592    #[test]
12593    fn mcp_get_heartbeats_after_init() {
12594        let runtime = rt();
12595        let h = Harness::new(&runtime);
12596        runtime.block_on(async move {
12597            let state = std::sync::Arc::new(crate::mcp_session::SessionState::new(
12598                solo_core::TenantId::default_tenant(),
12599                None,
12600            ));
12601            let session_id = crate::mcp_session::SessionId::new();
12602            let stream = build_mcp_session_stream(
12603                state,
12604                session_id.clone(),
12605                "default".to_string(),
12606                0,
12607                1, // 1-second heartbeat for the test
12608            );
12609            // Pull frames off the stream. Should see init then
12610            // (with no live events) a heartbeat within ~1.5s.
12611            use futures::StreamExt;
12612            let mut stream = std::pin::pin!(stream);
12613            let init_ev = tokio::time::timeout(std::time::Duration::from_secs(2), stream.next())
12614                .await
12615                .expect("init must arrive within 2s")
12616                .expect("stream must yield init");
12617            // Rendering the Event is opaque; we don't introspect it
12618            // here — the wire-format integration test
12619            // `mcp_get_with_valid_session_id_subscribes` covers that.
12620            // This test pins that a SECOND frame lands within the
12621            // heartbeat window. Drop the init frame.
12622            drop(init_ev);
12623            let hb = tokio::time::timeout(std::time::Duration::from_secs(3), stream.next())
12624                .await
12625                .expect("heartbeat must arrive within ~3s")
12626                .expect("stream must yield heartbeat");
12627            // Same opacity — we observe presence, not content. The
12628            // integration-level test
12629            // `mcp_get_with_valid_session_id_subscribes` covers wire
12630            // content.
12631            drop(hb);
12632        });
12633        h.shutdown(&runtime);
12634    }
12635
12636    /// v0.11.0 P3: `memory_ingest_document` emits the first two phase
12637    /// events (parsed, chunked) BEFORE the writer-actor call, so they
12638    /// fire even when the underlying writer has no embedder configured.
12639    /// This pins the upstream half of the 4-phase ingest progress
12640    /// taxonomy without needing a fully-equipped writer harness — the
12641    /// post-writer phases (embedded, inserted) are pinned indirectly
12642    /// by the `MCP_NOTIFICATION_PROGRESS_METHOD` grep-ability and by
12643    /// the dispatch_tests-level progress-emission tests for the other
12644    /// two long-running tools (search_docs / remember_batch).
12645    #[test]
12646    fn mcp_http_ingest_document_emits_parsed_and_chunked_progress_events() {
12647        let runtime = rt();
12648        let h = Harness::new(&runtime);
12649        let r = h.router.clone();
12650        let store = h.mcp_sessions.clone();
12651        runtime.block_on(async move {
12652            let session_id = allocate_mcp_session(r.clone()).await;
12653            let state = session_state_for_test(&store, &session_id);
12654            let mut rx = state.subscribe_events();
12655            // Write a `.txt` so the parser doesn't reject before the
12656            // first progress event fires — handle_ingest_document
12657            // emits `parsed` BEFORE the writer call but AFTER the
12658            // empty-path guard; both pre-writer phases fire regardless
12659            // of writer-side embedder configuration.
12660            let tmpdir = tempfile::TempDir::new().unwrap();
12661            let tmpfile = tmpdir.path().join("ingest-progress.txt");
12662            std::fs::write(&tmpfile, b"hello world progress test").unwrap();
12663            let body = json!({
12664                "jsonrpc": "2.0",
12665                "id": 2,
12666                "method": "tools/call",
12667                "params": {
12668                    "name": "memory_ingest_document",
12669                    "arguments": { "path": tmpfile.to_string_lossy() },
12670                    "_meta": { "progressToken": "ingest-tok" },
12671                },
12672            });
12673            let req = Request::builder()
12674                .method("POST")
12675                .uri("/mcp")
12676                .header("content-type", "application/json")
12677                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12678                .body(Body::from(serde_json::to_vec(&body).unwrap()))
12679                .unwrap();
12680            let resp = r.clone().oneshot(req).await.expect("oneshot");
12681            assert_eq!(resp.status(), StatusCode::OK);
12682            let _ = resp.into_body().collect().await.unwrap().to_bytes();
12683            let mut events = Vec::new();
12684            while let Ok(ev) = rx.try_recv() {
12685                events.push(ev);
12686            }
12687            // We expect AT LEAST the 2 pre-writer phase events. In a
12688            // fully-equipped harness the writer would succeed and the
12689            // post-writer phases (embedded + inserted) would also fire;
12690            // here we pin the pre-writer half + the spec envelope shape.
12691            assert!(
12692                events.len() >= 2,
12693                "expected at least 2 progress events (parsed + chunked), got {}: {events:?}",
12694                events.len()
12695            );
12696            // Phase 1 = "parsed"; phase 2 = "chunked"; both carry
12697            // total=4 and progressToken="ingest-tok".
12698            assert_eq!(events[0].data["params"]["progress"], json!(1));
12699            assert_eq!(events[0].data["params"]["message"], json!("parsed"));
12700            assert_eq!(events[1].data["params"]["progress"], json!(2));
12701            assert_eq!(events[1].data["params"]["message"], json!("chunked"));
12702            for ev in &events {
12703                assert_eq!(ev.event, crate::mcp_session::McpEventKind::Progress,);
12704                assert_eq!(
12705                    ev.data["method"],
12706                    json!(crate::mcp_progress::MCP_NOTIFICATION_PROGRESS_METHOD)
12707                );
12708                assert_eq!(ev.data["params"]["progressToken"], json!("ingest-tok"));
12709                assert_eq!(ev.data["params"]["total"], json!(4));
12710            }
12711        });
12712        h.shutdown(&runtime);
12713    }
12714
12715    /// v0.11.0 P3: end-to-end progress event roundtrip — POST a
12716    /// `tools/call` carrying `_meta.progressToken`, then reconnect via
12717    /// `GET /mcp` with a `Last-Event-ID` that triggers buffer replay.
12718    /// Confirms the wire path:
12719    /// `tools/call params._meta.progressToken` → ProgressReporter →
12720    /// SessionState.publish_event → replay buffer → GET SSE replay
12721    /// drain → client receives spec-shape envelope.
12722    ///
12723    /// `Last-Event-ID: 0` is treated as "brand new subscriber, no
12724    /// replay" per the v0.11.0 P2 contract — so we drive a non-zero
12725    /// `Last-Event-ID` smaller than every event id by first force-
12726    /// publishing one synthetic seed event (id 1), then issuing the
12727    /// real `tools/call` (which publishes 3 progress events with
12728    /// ids 2..=4), then GET with `Last-Event-ID: 1` to replay
12729    /// exactly the progress trio.
12730    #[test]
12731    fn mcp_http_progress_event_subscribers_receive_via_get_mcp_stream() {
12732        let runtime = rt();
12733        let h = Harness::new(&runtime);
12734        let r = h.router.clone();
12735        let store = h.mcp_sessions.clone();
12736        runtime.block_on(async move {
12737            // 1. Allocate a session via an initial POST.
12738            let session_id = allocate_mcp_session(r.clone()).await;
12739            // 2. Seed one synthetic event (id 1) so the buffer is
12740            //    non-empty before the real progress events. The GET
12741            //    handler's replay path only fires when last_event_id
12742            //    > 0; we'll pass Last-Event-ID: 1 to skip the seed and
12743            //    replay the progress events that follow.
12744            let state = session_state_for_test(&store, &session_id);
12745            state.publish_event(
12746                crate::mcp_session::McpEventKind::Message,
12747                json!({"seed": true}),
12748            );
12749            // 3. POST a `memory_search_docs` tools/call carrying
12750            //    `_meta.progressToken` (well above the top_k threshold
12751            //    so progress IS emitted). The query returns empty hits
12752            //    in the harness — what matters here is that the 3
12753            //    progress events fire as side effects of the call.
12754            let body = json!({
12755                "jsonrpc": "2.0",
12756                "id": 2,
12757                "method": "tools/call",
12758                "params": {
12759                    "name": "memory_search_docs",
12760                    "arguments": { "query": "anything", "limit": 150 },
12761                    "_meta": { "progressToken": "progress-roundtrip" },
12762                },
12763            });
12764            let req = Request::builder()
12765                .method("POST")
12766                .uri("/mcp")
12767                .header("content-type", "application/json")
12768                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
12769                .body(Body::from(serde_json::to_vec(&body).unwrap()))
12770                .unwrap();
12771            let resp = r.clone().oneshot(req).await.expect("oneshot");
12772            assert_eq!(resp.status(), StatusCode::OK);
12773            // Drain the POST response so the future completes before
12774            // we open the GET stream.
12775            let _ = resp.into_body().collect().await.unwrap().to_bytes();
12776            // 4. Open the GET stream with Last-Event-ID: 1 — replay
12777            //    every event past the seed.
12778            let (status, mut stream_body, _) = open_mcp_get_stream(r, &session_id, Some("1")).await;
12779            assert_eq!(status, StatusCode::OK);
12780            // First frame: init (id 0, reserved sentinel).
12781            let init = read_one_sse_event(&mut stream_body, std::time::Duration::from_secs(2))
12782                .await
12783                .expect("init must arrive within 2s");
12784            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
12785            // Then 3 progress events (the search_docs handler emits 3
12786            // when top_k > 100). Collect them and assert the spec
12787            // envelope shape.
12788            for expected_progress in 1u64..=3u64 {
12789                let ev = read_one_sse_event(&mut stream_body, std::time::Duration::from_secs(2))
12790                    .await
12791                    .expect("progress event must arrive within 2s");
12792                assert_eq!(
12793                    ev.event,
12794                    crate::mcp_session::MCP_STREAM_EVENT_PROGRESS_NAME,
12795                    "expected progress event #{expected_progress}, got {ev:?}",
12796                );
12797                // Spec-shape envelope: jsonrpc + method + params{progressToken, progress, total}.
12798                assert_eq!(ev.data["jsonrpc"], json!("2.0"));
12799                assert_eq!(
12800                    ev.data["method"],
12801                    json!(crate::mcp_progress::MCP_NOTIFICATION_PROGRESS_METHOD)
12802                );
12803                assert_eq!(
12804                    ev.data["params"]["progressToken"],
12805                    json!("progress-roundtrip")
12806                );
12807                assert_eq!(ev.data["params"]["progress"], json!(expected_progress));
12808                assert_eq!(ev.data["params"]["total"], json!(3));
12809            }
12810        });
12811        h.shutdown(&runtime);
12812    }
12813
12814    /// `initialize` returns the `{name: "solo", version: <crate
12815    /// version>}` server-info pinned by the stdio invariant test
12816    /// `server_info_identity_is_solo_not_rmcp_or_solo_api`. Sanity
12817    /// check that the v0.10.2 HTTP transport doesn't drift away from
12818    /// the stdio identity.
12819    #[test]
12820    fn mcp_http_initialize_returns_solo_server_info() {
12821        let runtime = rt();
12822        let h = Harness::new(&runtime);
12823        let r = h.router.clone();
12824        runtime.block_on(async move {
12825            let req = json!({
12826                "jsonrpc": "2.0",
12827                "id": 7,
12828                "method": "initialize",
12829                "params": {
12830                    "protocolVersion": "2024-11-05",
12831                    "capabilities": {},
12832                    "clientInfo": { "name": "solo-http-test", "version": "0.0.0" },
12833                },
12834            });
12835            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
12836            assert_eq!(status, StatusCode::OK);
12837            assert_eq!(
12838                body.pointer("/result/serverInfo/name")
12839                    .and_then(|v| v.as_str()),
12840                Some("solo"),
12841                "serverInfo.name must be `solo`, not `solo-api` or `rmcp`; got: {body}"
12842            );
12843            // `protocolVersion` is the static value the dispatcher
12844            // emits today (2024-11-05). The stdio loop emits rmcp's
12845            // own default — we cross-check those two stay aligned in
12846            // the v0.10.3+ session work; for v0.10.2 we just pin the
12847            // HTTP-side value.
12848            assert_eq!(
12849                body.pointer("/result/protocolVersion")
12850                    .and_then(|v| v.as_str()),
12851                Some("2024-11-05"),
12852            );
12853        });
12854        h.shutdown(&runtime);
12855    }
12856
12857    // ----------------------------------------------------------------
12858    // v0.11.0 P4 — notifications/message bridge from InvalidateEvent
12859    // ----------------------------------------------------------------
12860
12861    /// v0.11.0 P4: a fresh POST /mcp (no session id) causes the per-
12862    /// session invalidate bridge to be spawned. Pin by firing an
12863    /// invalidate on the harness's broadcast sender AFTER the session
12864    /// is allocated and asserting the session's own event channel
12865    /// receives an MCP `notifications/message` event.
12866    #[test]
12867    fn session_subscribes_to_tenant_invalidate_on_creation() {
12868        let runtime = rt();
12869        let h = Harness::new(&runtime);
12870        let r = h.router.clone();
12871        let store = h.mcp_sessions.clone();
12872        let sender = h.invalidate_sender();
12873        runtime.block_on(async move {
12874            // Allocate session — POST handler spawns the bridge.
12875            let session_id = allocate_mcp_session(r).await;
12876            let state = session_state_for_test(&store, &session_id);
12877            let mut rx = state.subscribe_events();
12878            // Fire one invalidate on the tenant's broadcast.
12879            sender
12880                .send(InvalidateEvent {
12881                    reason: "memory.remember".to_string(),
12882                    tenant_id: "default".to_string(),
12883                    ts_ms: 1_715_625_600_000,
12884                    kind: "episode".to_string(),
12885                })
12886                .expect("at least one subscriber (the bridge)");
12887            // Bridge forwards it to the session as an MCP Message.
12888            let received = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
12889                .await
12890                .expect("bridge must forward invalidate within 2s")
12891                .expect("session receiver must observe published event");
12892            assert_eq!(received.event, crate::mcp_session::McpEventKind::Message);
12893            assert_eq!(
12894                received.data["method"].as_str(),
12895                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
12896            );
12897        });
12898        h.shutdown(&runtime);
12899    }
12900
12901    /// v0.11.0 P4: pin the exact envelope shape — `jsonrpc=2.0`,
12902    /// `method=notifications/message`, `params.{level,logger,data,details}`.
12903    /// One full round-trip through the bridge so a future refactor
12904    /// that changes the wire format trips this test.
12905    #[test]
12906    fn invalidate_event_translates_to_mcp_notifications_message() {
12907        let runtime = rt();
12908        let h = Harness::new(&runtime);
12909        let r = h.router.clone();
12910        let store = h.mcp_sessions.clone();
12911        let sender = h.invalidate_sender();
12912        runtime.block_on(async move {
12913            let session_id = allocate_mcp_session(r).await;
12914            let state = session_state_for_test(&store, &session_id);
12915            let mut rx = state.subscribe_events();
12916            sender
12917                .send(InvalidateEvent {
12918                    reason: "memory.ingest_document".to_string(),
12919                    tenant_id: "default".to_string(),
12920                    ts_ms: 1_715_625_999_999,
12921                    kind: "document".to_string(),
12922                })
12923                .expect("at least one subscriber");
12924            let received = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
12925                .await
12926                .expect("forward within 2s")
12927                .expect("session must receive event");
12928            // Envelope shape.
12929            assert_eq!(received.data["jsonrpc"].as_str(), Some("2.0"));
12930            assert_eq!(
12931                received.data["method"].as_str(),
12932                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
12933            );
12934            let params = &received.data["params"];
12935            assert_eq!(
12936                params["level"].as_str(),
12937                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_LEVEL),
12938            );
12939            assert_eq!(
12940                params["logger"].as_str(),
12941                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_LOGGER),
12942            );
12943            // document kind maps to documents_updated.
12944            assert_eq!(
12945                params["data"].as_str(),
12946                Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_DOCUMENTS_UPDATED),
12947            );
12948            // details preserves the structured original event.
12949            assert_eq!(
12950                params["details"]["reason"].as_str(),
12951                Some("memory.ingest_document"),
12952            );
12953            assert_eq!(params["details"]["kind"].as_str(), Some("document"),);
12954            assert_eq!(params["details"]["ts_ms"].as_i64(), Some(1_715_625_999_999),);
12955        });
12956        h.shutdown(&runtime);
12957    }
12958
12959    /// v0.11.0 P4: two sessions exist; each has its own bridge. An
12960    /// invalidate fires once on the (shared, single-tenant) broadcast
12961    /// and BOTH sessions receive it. Pins that the bridge is correctly
12962    /// per-session-scoped: it doesn't leak to a wrong session AND it
12963    /// doesn't fail to fan out to all sessions of the same tenant.
12964    ///
12965    /// The harness is single-tenant by design, so the "wrong tenant
12966    /// doesn't receive" half is structurally guaranteed (different
12967    /// tenants would have different `invalidate_sender`s — the
12968    /// `mcp_notify` unit tests pin the bridge wiring against a fake
12969    /// channel directly). This integration test pins the
12970    /// per-session-of-same-tenant fan-out behaviour.
12971    #[test]
12972    fn invalidate_event_published_to_correct_session_only() {
12973        let runtime = rt();
12974        let h = Harness::new(&runtime);
12975        let r = h.router.clone();
12976        let store = h.mcp_sessions.clone();
12977        let sender = h.invalidate_sender();
12978        runtime.block_on(async move {
12979            // Allocate two distinct sessions.
12980            let session_id_a = allocate_mcp_session(r.clone()).await;
12981            let session_id_b = allocate_mcp_session(r).await;
12982            assert_ne!(session_id_a, session_id_b);
12983            let state_a = session_state_for_test(&store, &session_id_a);
12984            let state_b = session_state_for_test(&store, &session_id_b);
12985            let mut rx_a = state_a.subscribe_events();
12986            let mut rx_b = state_b.subscribe_events();
12987            // Fire one invalidate.
12988            sender
12989                .send(InvalidateEvent {
12990                    reason: "memory.consolidate".to_string(),
12991                    tenant_id: "default".to_string(),
12992                    ts_ms: 1_715_625_600_000,
12993                    kind: "cluster".to_string(),
12994                })
12995                .expect("at least one subscriber");
12996            // Both sessions' bridges receive it independently.
12997            let a = tokio::time::timeout(std::time::Duration::from_secs(2), rx_a.recv())
12998                .await
12999                .expect("session A receives within 2s")
13000                .expect("session A receiver alive");
13001            let b = tokio::time::timeout(std::time::Duration::from_secs(2), rx_b.recv())
13002                .await
13003                .expect("session B receives within 2s")
13004                .expect("session B receiver alive");
13005            for evt in [&a, &b] {
13006                assert_eq!(evt.event, crate::mcp_session::McpEventKind::Message);
13007                assert_eq!(
13008                    evt.data["params"]["data"].as_str(),
13009                    Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_CONSOLIDATION_UPDATED),
13010                );
13011            }
13012        });
13013        h.shutdown(&runtime);
13014    }
13015
13016    /// v0.11.0 P4: full GET-stream integration. A POST opens a session
13017    /// AND spawns its bridge; an invalidate fires on the tenant's
13018    /// broadcast; a GET subscriber reading the SSE wire format
13019    /// observes the `event: message` SSE frame carrying the spec-shape
13020    /// `notifications/message` envelope.
13021    ///
13022    /// Uses the `Last-Event-ID` resume path with id 0 (sentinel —
13023    /// "I'm a new subscriber, no replay"); the invalidate fires AFTER
13024    /// the GET opens so the live broadcast receiver picks it up.
13025    #[test]
13026    fn mcp_get_subscriber_receives_notifications_message_event() {
13027        let runtime = rt();
13028        let h = Harness::new(&runtime);
13029        let r = h.router.clone();
13030        let sender = h.invalidate_sender();
13031        runtime.block_on(async move {
13032            let session_id = allocate_mcp_session(r.clone()).await;
13033            // Open the GET stream first so the live broadcast receiver
13034            // is attached BEFORE the invalidate fires.
13035            let (status, mut body, _) = open_mcp_get_stream(r, &session_id, None).await;
13036            assert_eq!(status, StatusCode::OK);
13037            // Drain the init frame.
13038            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
13039                .await
13040                .expect("init event must arrive within 2s");
13041            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME,);
13042            // Now fire the invalidate.
13043            sender
13044                .send(InvalidateEvent {
13045                    reason: "memory.triples_extract".to_string(),
13046                    tenant_id: "default".to_string(),
13047                    ts_ms: 1_715_625_600_000,
13048                    kind: "triple".to_string(),
13049                })
13050                .expect("send must succeed");
13051            // Bridge forwards → SessionState.publish_event → broadcast
13052            // → GET stream consumer → SSE wire frame.
13053            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
13054                .await
13055                .expect("message event must arrive within 2s");
13056            assert_eq!(ev.event, crate::mcp_session::MCP_STREAM_EVENT_MESSAGE_NAME,);
13057            assert_eq!(ev.data["jsonrpc"].as_str(), Some("2.0"));
13058            assert_eq!(
13059                ev.data["method"].as_str(),
13060                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
13061            );
13062            assert_eq!(
13063                ev.data["params"]["data"].as_str(),
13064                Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_GRAPH_UPDATED),
13065            );
13066            assert_eq!(
13067                ev.data["params"]["details"]["reason"].as_str(),
13068                Some("memory.triples_extract"),
13069            );
13070        });
13071        h.shutdown(&runtime);
13072    }
13073}
13074
13075#[cfg(test)]
13076mod cors_tests {
13077    use super::is_localhost_origin;
13078
13079    #[test]
13080    fn accepts_canonical_localhost_origins() {
13081        assert!(is_localhost_origin("http://localhost"));
13082        assert!(is_localhost_origin("http://localhost:3000"));
13083        assert!(is_localhost_origin("https://localhost:8443"));
13084        assert!(is_localhost_origin("http://127.0.0.1"));
13085        assert!(is_localhost_origin("http://127.0.0.1:5173"));
13086        assert!(is_localhost_origin("http://[::1]"));
13087        assert!(is_localhost_origin("http://[::1]:8080"));
13088    }
13089
13090    #[test]
13091    fn rejects_remote_origins() {
13092        assert!(!is_localhost_origin("http://example.com"));
13093        assert!(!is_localhost_origin("https://malicious.example"));
13094        assert!(!is_localhost_origin("http://192.168.1.5"));
13095        assert!(!is_localhost_origin("http://10.0.0.1"));
13096    }
13097
13098    #[test]
13099    fn rejects_dns_rebinding_tricks() {
13100        // nip.io and friends — DNS that resolves to 127.0.0.1 but the
13101        // Origin header carries the public-DNS name. Rejecting these
13102        // closes the rebinding-via-Origin gap.
13103        assert!(!is_localhost_origin("http://127.0.0.1.nip.io"));
13104        assert!(!is_localhost_origin("http://localhost.evil.com"));
13105        assert!(!is_localhost_origin("http://evil.localhost"));
13106    }
13107
13108    #[test]
13109    fn rejects_non_http_schemes() {
13110        assert!(!is_localhost_origin("file:///"));
13111        assert!(!is_localhost_origin("ws://localhost:3000"));
13112        assert!(!is_localhost_origin("javascript:alert(1)"));
13113    }
13114
13115    #[test]
13116    fn rejects_malformed() {
13117        assert!(!is_localhost_origin(""));
13118        assert!(!is_localhost_origin("localhost"));
13119        assert!(!is_localhost_origin("//localhost"));
13120    }
13121}