Skip to main content

solo_api/
http.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! HTTP/JSON transport for Solo. Local-only by default — binds to
4//! `127.0.0.1:<port>` and serves the same operations the MCP server
5//! exposes:
6//!
7//! Episode operations:
8//!   - `POST /memory`                — remember (body: { content, source_type?, source_id? })
9//!   - `POST /memory/search`         — recall  (body: { query, limit? })
10//!   - `GET  /memory/{id}`           — inspect
11//!   - `DELETE /memory/{id}?reason=…` — forget
12//!
13//! Maintenance:
14//!   - `POST /memory/consolidate`    — trigger a consolidation pass
15//!   - `POST /backup`                — encrypted online backup
16//!
17//! Derived-layer (v0.4.0+; queries against the Steward's outputs):
18//!   - `GET  /memory/themes?window_days=N&limit=K`
19//!   - `GET  /memory/facts_about?subject=X&predicate=Y&since_ms=N&until_ms=N&include_as_object=B&limit=K`
20//!   - `GET  /memory/contradictions?limit=K`
21//!   - `GET  /memory/clusters/{cluster_id}?full_content=true` (v0.5.0+)
22//!
23//! Document operations (v0.7.0+):
24//!   - `POST   /memory/documents`               — ingest a file
25//!   - `POST   /memory/documents/search`        — vector search over chunks
26//!   - `GET    /memory/documents`               — paginate documents
27//!   - `GET    /memory/documents/{id}`          — inspect one document
28//!   - `DELETE /memory/documents/{id}`          — soft-delete a document
29//!
30//! There's no auth at this layer. The threat model is local-machine
31//! single-user; binding to `127.0.0.1` keeps the surface off the LAN.
32//! A future commit can add bearer-token auth + LAN binding.
33//!
34//! ## Lifecycle
35//!
36//! `serve_http(addr, server, shutdown)` binds to `addr`, runs axum with
37//! `with_graceful_shutdown(shutdown)`, returns when shutdown fires or
38//! the listener errors. `solo http-serve` invokes this from inside a
39//! `OneShotContext`, so writer + reader pool + lockfile stay live for
40//! the server's lifetime and clean up properly afterwards.
41
42use std::convert::Infallible;
43use std::net::SocketAddr;
44use std::str::FromStr;
45use std::sync::Arc;
46use std::time::Duration;
47
48use axum::extract::{FromRequestParts, Path, Query, State};
49use axum::http::request::Parts;
50use axum::http::{HeaderValue, Method, StatusCode};
51use axum::response::sse::{Event, KeepAlive, Sse};
52use axum::response::{IntoResponse, Response};
53use axum::routing::{get, post};
54use axum::{Json, Router};
55use futures::Stream;
56use serde::{Deserialize, Serialize};
57use solo_core::{
58    Confidence, DocumentId, EncodingContext, Episode, InvalidateEvent, MemoryId, TenantId,
59    Tier,
60};
61use solo_storage::{TenantHandle, TenantRegistry};
62use tokio::sync::broadcast;
63use tower_http::cors::{AllowOrigin, CorsLayer};
64use tower_http::trace::TraceLayer;
65
66use crate::auth::{AuthConfig, AuthenticatedPrincipal, middleware::AuthValidator};
67
68/// HTTP-side application state. v0.8.0 P2 swapped per-handler `WriteHandle
69/// + ReaderPool + ...` for a `TenantRegistry` that resolves tenant on each
70/// request via the `X-Solo-Tenant` header (default tenant if absent).
71#[derive(Clone)]
72pub struct SoloHttpState {
73    /// Multi-tenant registry. Lazy-loads tenants on first request.
74    pub registry: Arc<TenantRegistry>,
75    /// Default tenant used when the `X-Solo-Tenant` header is absent.
76    /// Typically `TenantId::default_tenant()`.
77    pub default_tenant: TenantId,
78    /// Read-path aliases for the canonical `"user"` subject. Sourced
79    /// from `solo.config.toml` `[identity] user_aliases`; threaded
80    /// through to `solo_query::facts_about` so a query for `"alex"`
81    /// also surfaces rows historically extracted as `"user"`. Empty
82    /// vec = behave as today. Wrapped in `Arc` so handler `clone()`s
83    /// stay cheap. v0.5.0 Priority 1 sub-step 1C.
84    pub user_aliases: Arc<Vec<String>>,
85    /// v0.11.0 P1: MCP `Mcp-Session-Id` session store. In-memory,
86    /// TTL-bounded (30 min inactivity / 4 hr absolute). The middleware
87    /// on the `/mcp` route validates request headers against this
88    /// store; the POST handler creates new entries on the first
89    /// request without a session id. See
90    /// `crates/solo-api/src/mcp_session.rs` +
91    /// `docs/dev-log/0132-v0.11.0-implementation-plan.md` §3 Decision A.
92    pub mcp_sessions: crate::mcp_session::SessionStore,
93}
94
95/// HTTP header that routes a request to a specific tenant. Optional;
96/// absent → state.default_tenant.
97pub const TENANT_HEADER: &str = "x-solo-tenant";
98
99/// Axum extractor that resolves the request's target tenant, then
100/// lazy-opens the tenant via the registry.
101///
102/// Resolution order (v0.8.0 P3):
103///   1. `AuthenticatedPrincipal.tenant_claim` from request extensions —
104///      set by the auth middleware. In OIDC mode this is the validated
105///      value of the configured custom claim (default `solo_tenant`);
106///      in bearer mode this is the daemon's default tenant.
107///   2. `X-Solo-Tenant` header — falls back to this when no
108///      authenticated principal is on the request (unauthenticated
109///      loopback deployments — the default).
110///   3. `state.default_tenant` when neither is present.
111///
112/// Bad header values → 400. Lazy-open failures → 500 unless the failure
113/// kind is `NotFound` (unknown tenant id) → 404.
114pub struct TenantExtractor(pub Arc<TenantHandle>);
115
116impl<S> FromRequestParts<S> for TenantExtractor
117where
118    SoloHttpState: FromRef<S>,
119    S: Send + Sync,
120{
121    type Rejection = ApiError;
122
123    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
124        let state = SoloHttpState::from_ref(state);
125        // Order: (1) principal.tenant_claim (set by auth middleware),
126        // (2) X-Solo-Tenant header, (3) state.default_tenant.
127        //
128        // The principal wins because in OIDC mode the JWT is the source
129        // of truth — letting the header override an OIDC claim would
130        // be a tenant-impersonation hole.
131        let resolved = if let Some(principal) = parts.extensions.get::<AuthenticatedPrincipal>()
132            && let Some(claim) = principal.tenant_claim.clone()
133        {
134            claim
135        } else {
136            match parts.headers.get(TENANT_HEADER) {
137                None => state.default_tenant.clone(),
138                Some(raw) => {
139                    let s = raw.to_str().map_err(|e| {
140                        ApiError::bad_request(format!(
141                            "{TENANT_HEADER}: header value must be ASCII ({e})"
142                        ))
143                    })?;
144                    TenantId::new(s.to_string()).map_err(|e| {
145                        ApiError::bad_request(format!("{TENANT_HEADER}: invalid tenant id: {e}"))
146                    })?
147                }
148            }
149        };
150        let handle = state.registry.get_or_open(&resolved).await.map_err(|e| {
151            // Map NotFound → 404; everything else → 500.
152            use solo_core::Error;
153            match &e {
154                Error::NotFound(_) => ApiError::not_found(e.to_string()),
155                Error::InvalidInput(_) => ApiError::bad_request(e.to_string()),
156                _ => ApiError::internal(e.to_string()),
157            }
158        })?;
159        Ok(TenantExtractor(handle))
160    }
161}
162
163use axum::extract::FromRef;
164
165/// v0.8.0 P4: extractor that pulls the authenticated principal's
166/// `subject` (JWT `sub` or `"bearer"`) out of request extensions for the
167/// audit log. `None` when no `AuthenticatedPrincipal` is present
168/// (unauthenticated loopback deployments).
169pub struct AuditPrincipal(pub Option<String>);
170
171impl<S> FromRequestParts<S> for AuditPrincipal
172where
173    S: Send + Sync,
174{
175    type Rejection = std::convert::Infallible;
176
177    async fn from_request_parts(
178        parts: &mut Parts,
179        _state: &S,
180    ) -> Result<Self, Self::Rejection> {
181        Ok(AuditPrincipal(
182            parts
183                .extensions
184                .get::<AuthenticatedPrincipal>()
185                .map(|p| p.subject.clone()),
186        ))
187    }
188}
189
190/// v0.10.0: extractor that lifts the full `AuthenticatedPrincipal` out
191/// of request extensions for the `/v1/tenants` handler. Distinct from
192/// `AuditPrincipal` (which only carries `subject: Option<String>`) — the
193/// tenant-list handler needs the `tenant_claim` and `claims` fields to
194/// distinguish bearer (claims = Null) from OIDC (claims = JWT object)
195/// principals.
196///
197/// `None` when no `AuthenticatedPrincipal` is on the request — the
198/// unauthenticated loopback deployment path, which the tenant-list
199/// handler treats as "all tenants visible" (same scope as the
200/// `solo tenants list` CLI). See `docs/dev-log/0119-tenants-list-impl.md`
201/// for the three-case visibility rule.
202pub struct MaybePrincipal(pub Option<AuthenticatedPrincipal>);
203
204impl<S> FromRequestParts<S> for MaybePrincipal
205where
206    S: Send + Sync,
207{
208    type Rejection = std::convert::Infallible;
209
210    async fn from_request_parts(
211        parts: &mut Parts,
212        _state: &S,
213    ) -> Result<Self, Self::Rejection> {
214        Ok(MaybePrincipal(
215            parts
216                .extensions
217                .get::<AuthenticatedPrincipal>()
218                .cloned(),
219        ))
220    }
221}
222
223/// Build the router with optional bearer-token auth (v0.7.x legacy shape).
224///
225/// When `bearer_token` is `Some(t)`, every request except `GET /health`
226/// + `GET /openapi.json` (unauthenticated probes / machine-readable spec)
227/// requires `Authorization: Bearer t`. v0.8.0 P3 routes this through the
228/// new `AuthValidator::Bearer` middleware so an `AuthenticatedPrincipal`
229/// is attached to every authenticated request (the `TenantExtractor`
230/// reads `principal.tenant_claim` ahead of the `X-Solo-Tenant` header).
231pub fn router_with_auth(state: SoloHttpState, bearer_token: Option<String>) -> Router {
232    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
233    router_with_auth_config(state, auth)
234}
235
236/// Build the router with a config-driven auth block (v0.8.0 P3+).
237///
238/// `auth = Some(AuthConfig::Bearer { token })` is equivalent to passing
239/// `Some(token)` to [`router_with_auth`]. `auth = Some(AuthConfig::Oidc { … })`
240/// installs the OIDC middleware (JWKS fetch + cache + sig + claim checks).
241/// `auth = None` runs unauthenticated — same `127.0.0.1` default as v0.7.x.
242///
243/// Public routes (`/health`, `/openapi.json`) are always exempt from
244/// auth — load balancers, uptime monitors, and codegen tools shouldn't
245/// need credentials.
246pub fn router_with_auth_config(state: SoloHttpState, auth: Option<AuthConfig>) -> Router {
247    let cors = build_cors_layer();
248    // Public, always-unauthenticated routes:
249    //   - GET /health: liveness probe (load balancers, uptime monitors).
250    //   - GET /openapi.json: machine-readable API description for client
251    //     codegen + browser-UI tooling (TypeScript / OpenAPI Generator,
252    //     curl-tools, etc.). The spec describes the API shape, not
253    //     secrets — fine to serve unauthenticated even on a LAN-bound
254    //     instance.
255    let public = Router::new()
256        .route("/health", get(|| async { "ok" }))
257        .route("/openapi.json", get(openapi_handler));
258
259    let authed = Router::new()
260        .route("/memory", post(remember_handler))
261        .route("/memory/search", post(recall_handler))
262        .route("/memory/consolidate", post(consolidate_handler))
263        .route("/memory/{id}", get(inspect_handler).delete(forget_handler))
264        .route("/backup", post(backup_handler))
265        // Path 1 derived-layer endpoints (v0.4.0+). GET-shaped because
266        // these are pure read-only queries; query-string params for
267        // simple filters keep them curl-friendly without a JSON body.
268        .route("/memory/themes", get(themes_handler))
269        .route("/memory/facts_about", get(facts_about_handler))
270        .route("/memory/contradictions", get(contradictions_handler))
271        // v0.5.0 Priority 3: drill into one cluster + abstraction +
272        // episodes. Two-segment path (`/memory/clusters/{id}`) so it
273        // does not shadow the single-segment `/memory/{id}` UUID
274        // inspect route.
275        .route(
276            "/memory/clusters/{cluster_id}",
277            get(inspect_cluster_handler),
278        )
279        // v0.7.0 P6: document operations. Two-segment paths
280        // (`/memory/documents/...`) so they don't shadow the
281        // single-segment `/memory/{id}` episode-inspect route. Order
282        // matters: register the literal `/memory/documents/search`
283        // ahead of `/memory/documents/{id}` so axum's matcher prefers
284        // the literal over the path parameter.
285        .route(
286            "/memory/documents/search",
287            post(search_docs_handler),
288        )
289        .route(
290            "/memory/documents",
291            post(ingest_document_handler).get(list_documents_handler),
292        )
293        .route(
294            "/memory/documents/{id}",
295            get(inspect_document_handler).delete(forget_document_handler),
296        )
297        // v0.9.x: graph drill-down for solo-web. Read-only neighbor
298        // expansion off any node in the memory graph. See
299        // `docs/dev-log/0105-solo-web-scoping.md` §4 + the impl dev log
300        // for the full `/v1/graph/*` family this is the first of.
301        .route("/v1/graph/expand", get(graph_expand_handler))
302        // v0.10.0: paginated catalog reads for solo-web's initial graph
303        // render. See `docs/dev-log/0114-graph-nodes-edges-impl.md`
304        // alongside the same scoping doc.
305        .route("/v1/graph/nodes", get(graph_nodes_handler))
306        .route("/v1/graph/edges", get(graph_edges_handler))
307        // v0.10.0: kind-discriminated full-record drill for solo-web's
308        // inspector panel. See `docs/dev-log/0115-graph-inspect-impl.md`.
309        .route("/v1/graph/inspect/{id}", get(graph_inspect_handler))
310        // v0.10.0: unified explicit + HNSW-semantic neighbors for solo-
311        // web's "show similar" overlay. See
312        // `docs/dev-log/0116-graph-neighbors-impl.md`.
313        .route("/v1/graph/neighbors/{id}", get(graph_neighbors_handler))
314        // v0.10.0: Server-Sent Events stream of graph-data invalidations
315        // for solo-web's live update story. The wire format is
316        // INVALIDATION-shaped (`{reason, tenant_id, ts_ms, kind}`) per
317        // scoping doc §3 Decision C — clients refetch the affected page
318        // on each event rather than receiving row payloads. See
319        // `docs/dev-log/0117-graph-stream-impl.md`.
320        .route("/v1/graph/stream", get(graph_stream_handler))
321        // v0.10.0: principal-scoped tenant list for solo-web's top-bar
322        // tenant picker. Read-only — admin CRUD (create/delete) remains
323        // CLI-only per ADR-0004 §"Admin operations". The visibility
324        // filter is principal-driven: no-auth + bearer principals see
325        // every active tenant; OIDC principals see only the tenant
326        // named by their `tenant_claim`. See
327        // `docs/dev-log/0119-tenants-list-impl.md` + scoping doc §3
328        // Decision F + §4 Route 6.
329        .route("/v1/tenants", get(tenants_list_handler))
330        .with_state(state.clone());
331
332    // v0.10.2: MCP-over-HTTP transport on /mcp. Lets one Solo process
333    // serve both `/v1/graph/*` (REST, for solo-web) and `/mcp`
334    // (JSON-RPC, for solo-jarvis) without the
335    // single-writer-per-data-dir lock dance. See
336    // `docs/dev-log/0129-v0.10.2-mcp-over-http-impl.md` for the spec.
337    // POST + GET share the same path; axum's `MethodRouter` muxes by
338    // HTTP method. OPTIONS is handled by the `CorsLayer` (already
339    // wired below) — we don't need an explicit handler.
340    //
341    // v0.11.0 P1: the route gets its own session middleware layer
342    // (`mcp_session_middleware`) that validates the `Mcp-Session-Id`
343    // request header against the per-process `SessionStore`. Expired
344    // / unknown sessions return 404 with a re-init instruction; the
345    // POST handler creates a new session on a request that arrived
346    // without the header and echoes the assigned id back via
347    // `Mcp-Session-Id` response header. The middleware lives on this
348    // sub-router (not the outer `authed`) so the rest of the API
349    // surface is unaffected — only `/mcp` carries session semantics.
350    let mcp_router: Router<SoloHttpState> = Router::new()
351        .route(
352            "/mcp",
353            post(mcp_http_post_handler).get(mcp_http_get_handler),
354        )
355        .layer(axum::middleware::from_fn_with_state(
356            state.mcp_sessions.clone(),
357            crate::mcp_session::mcp_session_middleware,
358        ));
359    let authed = authed.merge(mcp_router.with_state(state.clone()));
360
361    let authed = if let Some(cfg) = auth {
362        // v0.8.0 P3: dispatch via AuthValidator (bearer | OIDC), inserts
363        // AuthenticatedPrincipal into request extensions for the
364        // TenantExtractor + audit-log to read.
365        let validator = Arc::new(AuthValidator::from_config(
366            &cfg,
367            state.default_tenant.clone(),
368        ));
369        authed.layer(axum::middleware::from_fn_with_state(
370            validator,
371            crate::auth::middleware::auth_middleware,
372        ))
373    } else {
374        authed
375    };
376
377    public
378        .merge(authed)
379        .layer(cors)
380        .layer(TraceLayer::new_for_http())
381}
382
383/// Convenience wrapper: no auth (loopback-only deployments).
384pub fn router(state: SoloHttpState) -> Router {
385    router_with_auth_config(state, None)
386}
387
388fn build_cors_layer() -> CorsLayer {
389    // Permissive-localhost CORS: allow any localhost / 127.0.0.1 origin so
390    // browser-based UIs running on a different local port can call the API
391    // without preflight friction. We do NOT use `Any` because that would
392    // allow arbitrary remote origins to talk to our localhost server via
393    // a victim's browser. With bearer-token auth enabled the practical
394    // impact is reduced (the cross-origin attacker still can't supply
395    // the token), but principle of least privilege says refuse anyway.
396    //
397    // When the server is bound to a non-loopback address (auth required),
398    // the same CORS predicate keeps localhost-only browser clients —
399    // suitable for trusted-LAN deployments where the LAN client itself
400    // tunnels through ssh/wireguard back to localhost. Wider CORS for
401    // genuine cross-origin browser use is a future config knob.
402    CorsLayer::new()
403        .allow_origin(AllowOrigin::predicate(|origin: &HeaderValue, _req| {
404            origin
405                .to_str()
406                .map(is_localhost_origin)
407                .unwrap_or(false)
408        }))
409        .allow_methods([Method::GET, Method::POST, Method::DELETE, Method::OPTIONS])
410        .allow_headers([
411            axum::http::header::CONTENT_TYPE,
412            axum::http::header::AUTHORIZATION,
413            // Custom Solo headers — browsers preflight-check these and
414            // refuse the actual request if they're not in the allow list.
415            // Without `x-solo-tenant` solo-web's browser fetches all fail
416            // with "Failed to fetch" (CORS preflight rejection).
417            axum::http::HeaderName::from_static("x-solo-tenant"),
418            // v0.10.2: `Mcp-Session-Id` is part of the MCP Streamable
419            // HTTP transport spec (sessions, resumable streams). v0.11.0
420            // P1/P2 implement the real session affinity + resumable GET
421            // stream behind this header; the allow-list entry was
422            // pre-wired in v0.10.2 so browser-based MCP clients that
423            // preflight for it (per the spec) succeed instead of
424            // failing with a CORS error before the first request even
425            // lands.
426            axum::http::HeaderName::from_static("mcp-session-id"),
427            // v0.11.0 P2: `Last-Event-ID` is the SSE-spec header carrying
428            // the client's last-seen event id on reconnect. The
429            // resumable `GET /mcp` handler reads it and replays the
430            // missed events from the per-session ring buffer
431            // (Decision E). Browsers preflight any non-CORS-safelisted
432            // request header; without this entry the preflight fails
433            // before the actual reconnect lands.
434            axum::http::HeaderName::from_static(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER),
435        ])
436}
437
438/// True if `origin` is `http(s)://localhost[:port]` or
439/// `http(s)://127.0.0.1[:port]` or `http(s)://[::1][:port]` (loopback IPv6).
440/// Anything else (incl. nip.io tricks like `127.0.0.1.nip.io`) is rejected.
441fn is_localhost_origin(origin: &str) -> bool {
442    let rest = origin
443        .strip_prefix("http://")
444        .or_else(|| origin.strip_prefix("https://"));
445    let host = match rest {
446        Some(r) => r,
447        None => return false,
448    };
449    // Strip path (shouldn't appear on Origin headers but defend anyway).
450    let host = host.split('/').next().unwrap_or(host);
451    // Strip port.
452    let host = if let Some(idx) = host.rfind(':') {
453        // For [::1]:port, keep the brackets in the host part.
454        if host.starts_with('[') {
455            // Find matching ']'; everything up to and including it is the host.
456            host.find(']')
457                .map(|i| &host[..=i])
458                .unwrap_or(host)
459        } else {
460            &host[..idx]
461        }
462    } else {
463        host
464    };
465    matches!(host, "localhost" | "127.0.0.1" | "[::1]")
466}
467
468/// Bind + serve (v0.7.x legacy shape). `shutdown` is awaited inside
469/// axum's `with_graceful_shutdown`; resolving it triggers a clean drain.
470/// `bearer_token = None` runs unauthenticated (loopback default);
471/// `Some(t)` requires `Authorization: Bearer t` on every request
472/// except `GET /health` + `GET /openapi.json`.
473pub async fn serve_http(
474    addr: SocketAddr,
475    state: SoloHttpState,
476    bearer_token: Option<String>,
477    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
478) -> std::io::Result<()> {
479    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
480    serve_http_with_auth_config(addr, state, auth, shutdown).await
481}
482
483/// Bind + serve with a config-driven auth block (v0.8.0 P3+).
484/// `auth = None` runs unauthenticated. See [`router_with_auth_config`]
485/// for the auth-mode semantics.
486pub async fn serve_http_with_auth_config(
487    addr: SocketAddr,
488    state: SoloHttpState,
489    auth: Option<AuthConfig>,
490    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
491) -> std::io::Result<()> {
492    let auth_kind = match &auth {
493        Some(AuthConfig::Bearer { .. }) => "bearer",
494        Some(AuthConfig::Oidc { .. }) => "oidc",
495        None => "none",
496    };
497    let app = router_with_auth_config(state, auth);
498    let listener = tokio::net::TcpListener::bind(addr).await?;
499    tracing::info!(%addr, auth = auth_kind, "solo http: listening");
500    axum::serve(listener, app)
501        .with_graceful_shutdown(shutdown)
502        .await
503}
504
505// ---------------------------------------------------------------------------
506// OpenAPI 3.1 spec
507// ---------------------------------------------------------------------------
508
509/// Serve the hand-crafted OpenAPI 3.1 spec at `GET /openapi.json`.
510///
511/// We keep the spec hand-written (rather than deriving via `utoipa`)
512/// for v0.1: 4 simple endpoints, types live across crate boundaries
513/// (`solo_query::RecallResult`, `solo_query::EpisodeRecord`), and a
514/// `utoipa` retrofit would touch every crate. Hand-crafted is one
515/// JSON literal in this file; a smoke test in `handler_tests` parses
516/// the response and asserts the expected paths + components are
517/// present, so drift between spec and code is caught at PR time.
518async fn openapi_handler() -> Json<serde_json::Value> {
519    Json(openapi_spec())
520}
521
522/// Build the OpenAPI 3.1 spec describing Solo's HTTP transport.
523/// Public so the smoke test + future client-codegen tooling can
524/// produce the same document without spinning up the server.
525pub fn openapi_spec() -> serde_json::Value {
526    serde_json::json!({
527        "openapi": "3.1.0",
528        "info": {
529            "title": "Solo HTTP API",
530            "description":
531                "Local-first personal memory daemon. The HTTP transport \
532                 mirrors the four MCP tools (memory_remember / recall / \
533                 inspect / forget). Default deployment is loopback-only \
534                 (127.0.0.1); LAN-bound deployments require a bearer \
535                 token via `solo http-serve --bind <ip> --bearer-token-file <path>`.",
536            "version": env!("CARGO_PKG_VERSION"),
537            "license": { "name": "Apache-2.0" }
538        },
539        "servers": [
540            { "url": "http://127.0.0.1:7437", "description": "Default loopback (replace port with your --http-port)" }
541        ],
542        "components": {
543            "securitySchemes": {
544                "bearerAuth": {
545                    "type": "http",
546                    "scheme": "bearer",
547                    "description":
548                        "Bearer-token auth. Required only on LAN-bound deployments \
549                         (`solo http-serve --bind <non-loopback> --bearer-token-file <path>`); \
550                         the default `127.0.0.1` deployment is unauthenticated. \
551                         `GET /health` and `GET /openapi.json` are exempt from auth even \
552                         on bearer-protected instances."
553                }
554            },
555            "schemas": {
556                "RememberRequest": {
557                    "type": "object",
558                    "required": ["content"],
559                    "properties": {
560                        "content": { "type": "string", "minLength": 1, "description": "Episode content to embed + store." },
561                        "source_type": { "type": "string", "description": "Free-form source tag (e.g. `user_message`, `tool_output`). Defaults to `user_message`." },
562                        "source_id": { "type": "string", "description": "Optional upstream ID for traceability." }
563                    },
564                    "additionalProperties": false
565                },
566                "RememberResponse": {
567                    "type": "object",
568                    "required": ["memory_id"],
569                    "properties": {
570                        "memory_id": { "type": "string", "format": "uuid", "description": "UUID v7 assigned to the new episode." }
571                    }
572                },
573                "RecallRequest": {
574                    "type": "object",
575                    "required": ["query"],
576                    "properties": {
577                        "query": { "type": "string", "minLength": 1, "description": "Natural-language query; embedded by the same model as stored episodes." },
578                        "limit": { "type": "integer", "minimum": 1, "maximum": 50, "default": 5, "description": "Max number of hits to return." }
579                    },
580                    "additionalProperties": false
581                },
582                "RecallResult": {
583                    "type": "object",
584                    "description":
585                        "Recall response. Fields are stable across v0.1 but not exhaustively documented here — \
586                         see `solo_query::RecallResult` in the source for the canonical shape. \
587                         Treat as a forward-compatible JSON object.",
588                    "additionalProperties": true
589                },
590                "ConsolidationScope": {
591                    "type": "object",
592                    "description": "Filter + flags for consolidation. All fields optional; empty body = unbounded defaults.",
593                    "properties": {
594                        "window_days": { "type": "integer", "nullable": true, "description": "Restrict to memories with ts_ms >= now - window_days * 86400000. Null/omitted = unbounded." },
595                        "force_merge": { "type": "boolean", "default": false, "description": "Run the existing-vs-existing merge + abstraction-regen passes even with zero unclustered candidates. Drift catch-up on quiet corpora. Added in 0.3.1." }
596                    },
597                    "additionalProperties": false
598                },
599                "ConsolidationReport": {
600                    "type": "object",
601                    "required": [
602                        "episodes_seen", "clusters_built", "clusters_merged",
603                        "clusters_absorbed", "existing_clusters_merged",
604                        "episodes_clustered", "abstractions_built",
605                        "abstractions_regenerated", "triples_built",
606                        "contradictions_found"
607                    ],
608                    "properties": {
609                        "episodes_seen":             { "type": "integer", "minimum": 0 },
610                        "clusters_built":            { "type": "integer", "minimum": 0, "description": "Brand-new clusters that survived to be persisted (post in-run-merge, post cross-run-absorb)." },
611                        "clusters_merged":           { "type": "integer", "minimum": 0, "description": "In-run merge: clusters absorbed into a sibling within this consolidate run (cross-UTC-bucket case). Counts losers." },
612                        "clusters_absorbed":         { "type": "integer", "minimum": 0, "description": "Cross-run absorb: freshly-built clusters folded into a pre-existing DB cluster with a similar centroid. Counts new-side clusters." },
613                        "existing_clusters_merged":  { "type": "integer", "minimum": 0, "description": "Existing-vs-existing merge: pre-existing DB clusters that drifted toward each other and now coalesce. Counts losers." },
614                        "episodes_clustered":        { "type": "integer", "minimum": 0 },
615                        "abstractions_built":        { "type": "integer", "minimum": 0, "description": "Fresh abstractions persisted for newly-built clusters. 0 when no LlmClient is wired." },
616                        "abstractions_regenerated":  { "type": "integer", "minimum": 0, "description": "Existing clusters whose stale abstractions were dropped and rebuilt because absorb or existing-merge changed their episode set. 0 without an LlmClient." },
617                        "triples_built":             { "type": "integer", "minimum": 0 },
618                        "contradictions_found":      { "type": "integer", "minimum": 0 }
619                    }
620                },
621                "EpisodeRecord": {
622                    "type": "object",
623                    "description":
624                        "Inspect response: full episode record. Fields are stable across v0.1 but not \
625                         exhaustively documented here — see `solo_query::EpisodeRecord` in the source. \
626                         Treat as a forward-compatible JSON object.",
627                    "additionalProperties": true
628                },
629                "ThemeHit": {
630                    "type": "object",
631                    "description":
632                        "One cluster + its (optional) abstraction. Returned by GET /memory/themes. \
633                         See `solo_query::ThemeHit` for the canonical shape: cluster_id, \
634                         abstraction_id?, abstraction_text?, episode_count, coherence, created_at_ms.",
635                    "additionalProperties": true
636                },
637                "FactHit": {
638                    "type": "object",
639                    "description":
640                        "One Steward-extracted SPO triple. Returned by GET /memory/facts_about. \
641                         See `solo_query::FactHit` for fields: triple_id, subject_id, predicate, \
642                         object_id, object_kind, valid_from_ms, valid_to_ms?, confidence, cluster_id?.",
643                    "additionalProperties": true
644                },
645                "ContradictionHit": {
646                    "type": "object",
647                    "description":
648                        "One Steward-flagged contradiction with each side's triple LEFT JOIN'd in. \
649                         Returned by GET /memory/contradictions. See `solo_query::ContradictionHit`: \
650                         a_id, b_id, kind, explanation, detected_at_ms, a_triple?, b_triple?.",
651                    "additionalProperties": true
652                },
653                "ClusterRecord": {
654                    "type": "object",
655                    "description":
656                        "Snapshot of one cluster — its row, optional abstraction, and source episodes \
657                         (content truncated to 200 chars unless ?full_content=true). Returned by \
658                         GET /memory/clusters/{cluster_id}. See `solo_query::ClusterRecord`.",
659                    "additionalProperties": true
660                },
661                "IngestDocumentRequest": {
662                    "type": "object",
663                    "required": ["path"],
664                    "properties": {
665                        "path": {
666                            "type": "string",
667                            "minLength": 1,
668                            "description":
669                                "Server-side absolute path to the file to ingest. The file must be \
670                                 readable by the Solo process. Supported formats: plaintext / \
671                                 markdown / code, HTML, PDF."
672                        }
673                    },
674                    "additionalProperties": false
675                },
676                "IngestReport": {
677                    "type": "object",
678                    "description":
679                        "Returned by POST /memory/documents. Reports the document id assigned, \
680                         the number of chunks persisted + embedded, the total byte size, and a \
681                         `deduped` flag (true when the same content_hash was already present and \
682                         the existing doc_id was returned unchanged). See `solo_storage::IngestReport`.",
683                    "required": ["doc_id", "chunks_persisted", "bytes_ingested", "deduped"],
684                    "properties": {
685                        "doc_id":            { "type": "string", "format": "uuid" },
686                        "chunks_persisted":  { "type": "integer", "minimum": 0 },
687                        "bytes_ingested":    { "type": "integer", "minimum": 0, "format": "int64" },
688                        "deduped":           { "type": "boolean" }
689                    },
690                    "additionalProperties": false
691                },
692                "ForgetDocumentReport": {
693                    "type": "object",
694                    "description":
695                        "Returned by DELETE /memory/documents/{id}. Reports the doc_id soft-deleted \
696                         and how many chunk rowids were tombstoned in the HNSW index. The chunk rows \
697                         themselves survive in SQL for forensic value. See `solo_storage::ForgetDocumentReport`.",
698                    "required": ["doc_id", "chunks_tombstoned"],
699                    "properties": {
700                        "doc_id":             { "type": "string", "format": "uuid" },
701                        "chunks_tombstoned":  { "type": "integer", "minimum": 0 }
702                    },
703                    "additionalProperties": false
704                },
705                "SearchDocsRequest": {
706                    "type": "object",
707                    "required": ["query"],
708                    "properties": {
709                        "query": { "type": "string", "minLength": 1 },
710                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 }
711                    },
712                    "additionalProperties": false
713                },
714                "DocSearchHit": {
715                    "type": "object",
716                    "description":
717                        "One chunk hit + parent-doc context. Fields per `solo_query::DocSearchHit`: \
718                         chunk_id, doc_id, doc_title?, doc_source?, doc_mime_type?, chunk_index, \
719                         content, cos_distance, start_offset, end_offset.",
720                    "additionalProperties": true
721                },
722                "DocumentInspectResult": {
723                    "type": "object",
724                    "description":
725                        "Returned by GET /memory/documents/{id}. A `document` record (full metadata) \
726                         plus an ordered list of chunk summaries (each preview truncated to 200 \
727                         chars). See `solo_query::DocumentInspectResult`.",
728                    "additionalProperties": true
729                },
730                "DocumentSummary": {
731                    "type": "object",
732                    "description":
733                        "One row from GET /memory/documents. Fields per `solo_query::DocumentSummary`: \
734                         doc_id, title?, source?, mime_type?, ingested_at_ms, chunk_count, status.",
735                    "additionalProperties": true
736                },
737                "ApiError": {
738                    "type": "object",
739                    "required": ["error", "status"],
740                    "properties": {
741                        "error": { "type": "string" },
742                        "status": { "type": "integer", "minimum": 400, "maximum": 599 }
743                    }
744                }
745            }
746        },
747        "paths": {
748            "/health": {
749                "get": {
750                    "summary": "Liveness probe",
751                    "description": "Returns plain text `ok`. Always unauthenticated.",
752                    "responses": {
753                        "200": {
754                            "description": "Server is up.",
755                            "content": { "text/plain": { "schema": { "type": "string", "example": "ok" } } }
756                        }
757                    }
758                }
759            },
760            "/openapi.json": {
761                "get": {
762                    "summary": "Self-describing OpenAPI 3.1 spec",
763                    "description": "Returns this document. Always unauthenticated.",
764                    "responses": {
765                        "200": {
766                            "description": "OpenAPI 3.1 document.",
767                            "content": { "application/json": { "schema": { "type": "object" } } }
768                        }
769                    }
770                }
771            },
772            "/memory": {
773                "post": {
774                    "summary": "Remember (store an episode)",
775                    "description": "Equivalent to MCP tool `memory_remember`.",
776                    "security": [{ "bearerAuth": [] }, {}],
777                    "requestBody": {
778                        "required": true,
779                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberRequest" } } }
780                    },
781                    "responses": {
782                        "200": {
783                            "description": "Memory stored; returns the new MemoryId.",
784                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberResponse" } } }
785                        },
786                        "400": { "description": "Bad request (e.g. empty content).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
787                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
788                    }
789                }
790            },
791            "/memory/search": {
792                "post": {
793                    "summary": "Recall (vector search)",
794                    "description": "Equivalent to MCP tool `memory_recall`. Embeds the query, runs HNSW search, returns the top-K hits in cosine-distance order.",
795                    "security": [{ "bearerAuth": [] }, {}],
796                    "requestBody": {
797                        "required": true,
798                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallRequest" } } }
799                    },
800                    "responses": {
801                        "200": {
802                            "description": "Search results.",
803                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallResult" } } }
804                        },
805                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
806                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
807                    }
808                }
809            },
810            "/memory/consolidate": {
811                "post": {
812                    "summary": "Run a consolidation pass (clustering + abstraction)",
813                    "description":
814                        "Idempotent. Triggers the SWS-equivalent clustering pass; if a `Steward` LLM is wired \
815                         on the server, also runs the REM-equivalent abstraction pass that populates \
816                         `semantic_abstractions` and `triples`. Empty request body = default scope (unbounded \
817                         window). Equivalent to the `solo consolidate` CLI.",
818                    "security": [{ "bearerAuth": [] }, {}],
819                    "requestBody": {
820                        "required": false,
821                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationScope" } } }
822                    },
823                    "responses": {
824                        "200": {
825                            "description": "Consolidation complete; report counts the work done.",
826                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationReport" } } }
827                        },
828                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
829                    }
830                }
831            },
832            "/backup": {
833                "post": {
834                    "summary": "Online encrypted backup",
835                    "description":
836                        "Run an online SQLCipher backup of the live data dir to a server-side path. \
837                         The destination file is encrypted with the same Argon2id-derived raw key as \
838                         the source, so it restores under the same passphrase + a copy of the source's \
839                         `solo.config.toml`. Hot — the backup runs against the writer's existing \
840                         connection without taking the lockfile, so the daemon keeps serving reads + \
841                         writes during the operation. v0.3.2+.",
842                    "security": [{ "bearerAuth": [] }, {}],
843                    "requestBody": {
844                        "required": true,
845                        "content": { "application/json": { "schema": {
846                            "type": "object",
847                            "properties": {
848                                "to": { "type": "string", "description": "Server-side absolute path for the backup file." },
849                                "force": { "type": "boolean", "description": "Overwrite an existing destination file. Default false.", "default": false }
850                            },
851                            "required": ["to"]
852                        } } }
853                    },
854                    "responses": {
855                        "200": {
856                            "description": "Backup complete; reports the destination path + elapsed milliseconds.",
857                            "content": { "application/json": { "schema": {
858                                "type": "object",
859                                "properties": {
860                                    "path": { "type": "string" },
861                                    "elapsed_ms": { "type": "integer", "format": "int64" }
862                                }
863                            } } }
864                        },
865                        "400": { "description": "Destination invalid, exists without force, or its parent doesn't exist." },
866                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
867                        "500": { "description": "Backup failed (disk full, permission denied, etc.)." }
868                    }
869                }
870            },
871            "/memory/{id}": {
872                "get": {
873                    "summary": "Inspect a memory by ID",
874                    "description": "Equivalent to MCP tool `memory_inspect`.",
875                    "security": [{ "bearerAuth": [] }, {}],
876                    "parameters": [{
877                        "name": "id",
878                        "in": "path",
879                        "required": true,
880                        "schema": { "type": "string", "format": "uuid" },
881                        "description": "MemoryId (UUID v7)."
882                    }],
883                    "responses": {
884                        "200": {
885                            "description": "Episode record.",
886                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/EpisodeRecord" } } }
887                        },
888                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
889                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
890                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
891                    }
892                },
893                "delete": {
894                    "summary": "Forget (soft-delete) a memory by ID",
895                    "description":
896                        "Equivalent to MCP tool `memory_forget`. Soft-delete: flips `episodes.status = 'forgotten'` \
897                         and tombstones the HNSW vector. The row + embedding are preserved for forensics; \
898                         re-running `solo reembed` after this does NOT restore visibility.",
899                    "security": [{ "bearerAuth": [] }, {}],
900                    "parameters": [
901                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } },
902                        { "name": "reason", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Free-form reason logged via tracing (not yet persisted to the DB)." }
903                    ],
904                    "responses": {
905                        "204": { "description": "Forgotten (or already forgotten — idempotent)." },
906                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
907                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
908                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
909                    }
910                }
911            },
912            "/memory/themes": {
913                "get": {
914                    "summary": "List recent cluster themes",
915                    "description":
916                        "Equivalent to MCP tool `memory_themes`. List cluster abstractions ordered by \
917                         most-recent first. Use to surface 'what has the user been thinking about lately' \
918                         without paging through individual episodes. v0.4.0+.",
919                    "security": [{ "bearerAuth": [] }, {}],
920                    "parameters": [
921                        { "name": "window_days", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1 }, "description": "Optional time window. Omit for unfiltered (all-time, most-recent first)." },
922                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
923                    ],
924                    "responses": {
925                        "200": {
926                            "description": "Array of ThemeHits (possibly empty).",
927                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ThemeHit" } } } }
928                        },
929                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
930                    }
931                }
932            },
933            "/memory/facts_about": {
934                "get": {
935                    "summary": "Query the SPO knowledge graph by subject",
936                    "description":
937                        "Equivalent to MCP tool `memory_facts_about`. Query Steward-extracted triples by \
938                         subject + optional predicate + optional time window. Subject is required \
939                         (predicate-only scans not supported). Pass `include_as_object=true` (v0.5.1+) \
940                         to also surface rows where `subject` appears as the object. v0.4.0+.",
941                    "security": [{ "bearerAuth": [] }, {}],
942                    "parameters": [
943                        { "name": "subject", "in": "query", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Subject id to query (e.g. `Sam`)." },
944                        { "name": "predicate", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Optional predicate filter (e.g. `works_at`)." },
945                        { "name": "since_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_from_ms lower bound (epoch ms)." },
946                        { "name": "until_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through." },
947                        { "name": "include_as_object", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, also match rows where `subject` appears as the object (e.g. surface 'Sam pushes back on PRs about Maya' under subject='Maya'). Default false. v0.5.1+." },
948                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
949                    ],
950                    "responses": {
951                        "200": {
952                            "description": "Array of FactHits (possibly empty).",
953                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/FactHit" } } } }
954                        },
955                        "400": { "description": "Bad request (e.g. empty subject).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
956                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
957                    }
958                }
959            },
960            "/memory/contradictions": {
961                "get": {
962                    "summary": "List Steward-flagged contradictions",
963                    "description":
964                        "Equivalent to MCP tool `memory_contradictions`. Each result includes both \
965                         sides' triple SPO via LEFT JOIN for context. v0.4.0+.",
966                    "security": [{ "bearerAuth": [] }, {}],
967                    "parameters": [
968                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
969                    ],
970                    "responses": {
971                        "200": {
972                            "description": "Array of ContradictionHits (possibly empty).",
973                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ContradictionHit" } } } }
974                        },
975                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
976                    }
977                }
978            },
979            "/memory/clusters/{cluster_id}": {
980                "get": {
981                    "summary": "Inspect a single cluster",
982                    "description":
983                        "Equivalent to MCP tool `memory_inspect_cluster`. Returns the cluster row, \
984                         its (optional) abstraction, and its source episodes. By default each \
985                         episode's `content` is truncated to 200 chars with a trailing `…`. Pass \
986                         `?full_content=true` to get verbatim episode content. v0.5.0+.",
987                    "security": [{ "bearerAuth": [] }, {}],
988                    "parameters": [
989                        { "name": "cluster_id", "in": "path", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Cluster id (from a previous GET /memory/themes response)." },
990                        { "name": "full_content", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, return episode content verbatim. Default false (truncate to 200 chars + ellipsis)." }
991                    ],
992                    "responses": {
993                        "200": {
994                            "description": "Cluster snapshot.",
995                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ClusterRecord" } } }
996                        },
997                        "400": { "description": "Bad request (e.g. empty cluster_id).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
998                        "404": { "description": "No such cluster.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
999                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1000                    }
1001                }
1002            },
1003            "/memory/documents": {
1004                "post": {
1005                    "summary": "Ingest a document",
1006                    "description":
1007                        "Equivalent to MCP tool `memory_ingest_document`. Reads the file at the \
1008                         supplied server-side path, parses + chunks + embeds, and persists under \
1009                         `documents` + `document_chunks`. Returns the new doc_id, chunk count, and \
1010                         a `deduped` flag (true when an existing document with the same content_hash \
1011                         was returned without re-embedding). v0.7.0+.",
1012                    "security": [{ "bearerAuth": [] }, {}],
1013                    "requestBody": {
1014                        "required": true,
1015                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestDocumentRequest" } } }
1016                    },
1017                    "responses": {
1018                        "200": {
1019                            "description": "Document ingested (or deduplicated).",
1020                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestReport" } } }
1021                        },
1022                        "400": { "description": "Bad request (e.g. empty path, file unreadable, parse error).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1023                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1024                    }
1025                },
1026                "get": {
1027                    "summary": "List ingested documents (paginated)",
1028                    "description":
1029                        "Equivalent to MCP tool `memory_list_documents`. Returns a paginated index, \
1030                         newest first. Forgotten documents are hidden by default; pass \
1031                         `?include_forgotten=true` to see them too. v0.7.0+.",
1032                    "security": [{ "bearerAuth": [] }, {}],
1033                    "parameters": [
1034                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 20 } },
1035                        { "name": "offset", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 0, "default": 0 } },
1036                        { "name": "include_forgotten", "in": "query", "required": false, "schema": { "type": "boolean", "default": false } }
1037                    ],
1038                    "responses": {
1039                        "200": {
1040                            "description": "Array of DocumentSummary (possibly empty).",
1041                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocumentSummary" } } } }
1042                        },
1043                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1044                    }
1045                }
1046            },
1047            "/memory/documents/search": {
1048                "post": {
1049                    "summary": "Vector search across document chunks",
1050                    "description":
1051                        "Equivalent to MCP tool `memory_search_docs`. Embeds the query and returns \
1052                         up to `limit` matching chunks, best match first, each annotated with the \
1053                         parent document's title + source path. Forgotten documents are excluded. \
1054                         v0.7.0+.",
1055                    "security": [{ "bearerAuth": [] }, {}],
1056                    "requestBody": {
1057                        "required": true,
1058                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SearchDocsRequest" } } }
1059                    },
1060                    "responses": {
1061                        "200": {
1062                            "description": "Array of DocSearchHits (possibly empty).",
1063                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocSearchHit" } } } }
1064                        },
1065                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1066                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1067                    }
1068                }
1069            },
1070            "/memory/documents/{id}": {
1071                "get": {
1072                    "summary": "Inspect one document",
1073                    "description":
1074                        "Equivalent to MCP tool `memory_inspect_document`. Returns the document's \
1075                         metadata plus a preview of every chunk (truncated to 200 chars). v0.7.0+.",
1076                    "security": [{ "bearerAuth": [] }, {}],
1077                    "parameters": [
1078                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "DocumentId (UUID v7)." }
1079                    ],
1080                    "responses": {
1081                        "200": {
1082                            "description": "Document inspection result.",
1083                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/DocumentInspectResult" } } }
1084                        },
1085                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1086                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1087                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1088                    }
1089                },
1090                "delete": {
1091                    "summary": "Forget (soft-delete) one document",
1092                    "description":
1093                        "Equivalent to MCP tool `memory_forget_document`. Flips `documents.status` \
1094                         to `forgotten` and tombstones every chunk's HNSW rowid. The chunk rows \
1095                         survive in SQL for forensic value. v0.7.0+.",
1096                    "security": [{ "bearerAuth": [] }, {}],
1097                    "parameters": [
1098                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } }
1099                    ],
1100                    "responses": {
1101                        "200": {
1102                            "description": "Document soft-deleted; report counts chunks tombstoned.",
1103                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ForgetDocumentReport" } } }
1104                        },
1105                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1106                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1107                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1108                    }
1109                }
1110            }
1111        }
1112    })
1113}
1114
1115// ---------------------------------------------------------------------------
1116// Handlers
1117// ---------------------------------------------------------------------------
1118
1119#[derive(Debug, Deserialize)]
1120struct RememberBody {
1121    content: String,
1122    #[serde(default)]
1123    source_type: Option<String>,
1124    #[serde(default)]
1125    source_id: Option<String>,
1126}
1127
1128#[derive(Debug, Serialize)]
1129struct RememberResponse {
1130    memory_id: String,
1131}
1132
1133async fn remember_handler(
1134    TenantExtractor(tenant): TenantExtractor,
1135    AuditPrincipal(principal): AuditPrincipal,
1136    Json(body): Json<RememberBody>,
1137) -> Result<Json<RememberResponse>, ApiError> {
1138    let content = body.content.trim_end().to_string();
1139    if content.is_empty() {
1140        return Err(ApiError::bad_request("content must not be empty"));
1141    }
1142    let embedding = tenant.embedder().embed(&content).await.map_err(ApiError::from)?;
1143    let episode = Episode {
1144        memory_id: MemoryId::new(),
1145        ts_ms: chrono::Utc::now().timestamp_millis(),
1146        source_type: body.source_type.unwrap_or_else(|| "user_message".into()),
1147        source_id: body.source_id,
1148        content,
1149        encoding_context: EncodingContext::default(),
1150        provenance: None,
1151        confidence: Confidence::new(0.9).unwrap(),
1152        strength: 0.5,
1153        salience: 0.5,
1154        tier: Tier::Hot,
1155    };
1156    let mid = tenant
1157        .write()
1158        .remember_as(principal, episode, embedding)
1159        .await
1160        .map_err(ApiError::from)?;
1161    Ok(Json(RememberResponse {
1162        memory_id: mid.to_string(),
1163    }))
1164}
1165
1166#[derive(Debug, Deserialize)]
1167struct RecallBody {
1168    query: String,
1169    #[serde(default = "default_limit")]
1170    limit: usize,
1171}
1172
1173fn default_limit() -> usize {
1174    5
1175}
1176
1177async fn recall_handler(
1178    TenantExtractor(tenant): TenantExtractor,
1179    AuditPrincipal(principal): AuditPrincipal,
1180    Json(body): Json<RecallBody>,
1181) -> Result<Json<solo_query::RecallResult>, ApiError> {
1182    // solo_query::run_recall handles empty-query rejection (returns
1183    // InvalidInput → ApiError::bad_request(400)) and clamps limit
1184    // upstream of the embedder call.
1185    let result = solo_query::run_recall(tenant.as_ref(), principal, &body.query, body.limit)
1186        .await
1187        .map_err(ApiError::from)?;
1188    Ok(Json(result))
1189}
1190
1191async fn inspect_handler(
1192    TenantExtractor(tenant): TenantExtractor,
1193    AuditPrincipal(principal): AuditPrincipal,
1194    Path(id): Path<String>,
1195) -> Result<Json<solo_query::EpisodeRecord>, ApiError> {
1196    let mid = MemoryId::from_str(&id)
1197        .map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1198    let row = solo_query::inspect_one(tenant.read(), tenant.audit(), principal, mid)
1199        .await
1200        .map_err(ApiError::from)?;
1201    Ok(Json(row))
1202}
1203
1204// Path 1 derived-layer handlers (v0.4.0+). All three are GET-shaped:
1205// pure read-only queries against the Steward's outputs, query-string
1206// params for simple filters. Each handler delegates to a single
1207// solo_query::derived pipeline and returns the result Vec as JSON.
1208// Empty derived layer → 200 with `[]` body (parseable JSON array).
1209
1210#[derive(Debug, Deserialize)]
1211struct ThemesQuery {
1212    #[serde(default)]
1213    window_days: Option<i64>,
1214    #[serde(default = "default_limit")]
1215    limit: usize,
1216}
1217
1218async fn themes_handler(
1219    TenantExtractor(tenant): TenantExtractor,
1220    AuditPrincipal(principal): AuditPrincipal,
1221    Query(q): Query<ThemesQuery>,
1222) -> Result<Json<Vec<solo_query::ThemeHit>>, ApiError> {
1223    let hits = solo_query::themes(
1224        tenant.read(),
1225        tenant.audit(),
1226        principal,
1227        q.window_days,
1228        q.limit,
1229    )
1230    .await
1231    .map_err(ApiError::from)?;
1232    Ok(Json(hits))
1233}
1234
1235#[derive(Debug, Deserialize)]
1236struct FactsAboutQuery {
1237    subject: String,
1238    #[serde(default)]
1239    predicate: Option<String>,
1240    #[serde(default)]
1241    since_ms: Option<i64>,
1242    #[serde(default)]
1243    until_ms: Option<i64>,
1244    /// v0.5.1 Priority 8 — widen the query to also match rows where
1245    /// `subject` appears as the object. Default `false`.
1246    #[serde(default)]
1247    include_as_object: bool,
1248    #[serde(default = "default_limit")]
1249    limit: usize,
1250}
1251
1252async fn facts_about_handler(
1253    State(s): State<SoloHttpState>,
1254    TenantExtractor(tenant): TenantExtractor,
1255    AuditPrincipal(principal): AuditPrincipal,
1256    Query(q): Query<FactsAboutQuery>,
1257) -> Result<Json<Vec<solo_query::FactHit>>, ApiError> {
1258    if q.subject.trim().is_empty() {
1259        return Err(ApiError::bad_request("subject must not be empty"));
1260    }
1261    let hits = solo_query::facts_about(
1262        tenant.read(),
1263        tenant.audit(),
1264        principal,
1265        &q.subject,
1266        &s.user_aliases,
1267        q.include_as_object,
1268        q.predicate.as_deref(),
1269        q.since_ms,
1270        q.until_ms,
1271        q.limit,
1272    )
1273    .await
1274    .map_err(ApiError::from)?;
1275    Ok(Json(hits))
1276}
1277
1278#[derive(Debug, Deserialize)]
1279struct ContradictionsQuery {
1280    #[serde(default = "default_limit")]
1281    limit: usize,
1282}
1283
1284async fn contradictions_handler(
1285    TenantExtractor(tenant): TenantExtractor,
1286    AuditPrincipal(principal): AuditPrincipal,
1287    Query(q): Query<ContradictionsQuery>,
1288) -> Result<Json<Vec<solo_query::ContradictionHit>>, ApiError> {
1289    let hits = solo_query::contradictions(tenant.read(), tenant.audit(), principal, q.limit)
1290        .await
1291        .map_err(ApiError::from)?;
1292    Ok(Json(hits))
1293}
1294
1295#[derive(Debug, Deserialize, Default)]
1296struct InspectClusterQuery {
1297    /// Default `false` — episode `content` is truncated to
1298    /// `solo_query::EPISODE_TRUNCATE_CHARS` chars with a trailing `…`.
1299    /// `?full_content=true` returns each episode's content verbatim.
1300    #[serde(default)]
1301    full_content: bool,
1302}
1303
1304async fn inspect_cluster_handler(
1305    TenantExtractor(tenant): TenantExtractor,
1306    AuditPrincipal(principal): AuditPrincipal,
1307    Path(cluster_id): Path<String>,
1308    Query(q): Query<InspectClusterQuery>,
1309) -> Result<Json<solo_query::ClusterRecord>, ApiError> {
1310    if cluster_id.trim().is_empty() {
1311        return Err(ApiError::bad_request("cluster_id must not be empty"));
1312    }
1313    let record = solo_query::inspect_cluster(
1314        tenant.read(),
1315        tenant.audit(),
1316        principal,
1317        &cluster_id,
1318        q.full_content,
1319    )
1320    .await
1321    .map_err(ApiError::from)?;
1322    Ok(Json(record))
1323}
1324
1325// ---------------------------------------------------------------------------
1326// Document handlers (v0.7.0 P6)
1327// ---------------------------------------------------------------------------
1328
1329#[derive(Debug, Deserialize)]
1330struct IngestDocumentBody {
1331    /// Server-side absolute path to the file. Must be readable by the
1332    /// Solo process. The writer reads, parses, chunks, and embeds.
1333    path: String,
1334}
1335
1336async fn ingest_document_handler(
1337    TenantExtractor(tenant): TenantExtractor,
1338    AuditPrincipal(principal): AuditPrincipal,
1339    Json(body): Json<IngestDocumentBody>,
1340) -> Result<Json<solo_storage::IngestReport>, ApiError> {
1341    if body.path.trim().is_empty() {
1342        return Err(ApiError::bad_request("path must not be empty"));
1343    }
1344    let path = std::path::PathBuf::from(body.path);
1345    let chunk_config = solo_storage::document::ChunkConfig::default();
1346    let report = tenant
1347        .write()
1348        .ingest_document_as(principal, path, chunk_config)
1349        .await
1350        .map_err(ApiError::from)?;
1351    Ok(Json(report))
1352}
1353
1354#[derive(Debug, Deserialize)]
1355struct SearchDocsBody {
1356    query: String,
1357    #[serde(default = "default_limit")]
1358    limit: usize,
1359}
1360
1361async fn search_docs_handler(
1362    TenantExtractor(tenant): TenantExtractor,
1363    AuditPrincipal(principal): AuditPrincipal,
1364    Json(body): Json<SearchDocsBody>,
1365) -> Result<Json<Vec<solo_query::DocSearchHit>>, ApiError> {
1366    let hits = solo_query::run_doc_search(tenant.as_ref(), principal, &body.query, body.limit)
1367        .await
1368        .map_err(ApiError::from)?;
1369    Ok(Json(hits))
1370}
1371
1372async fn inspect_document_handler(
1373    TenantExtractor(tenant): TenantExtractor,
1374    AuditPrincipal(principal): AuditPrincipal,
1375    Path(id): Path<String>,
1376) -> Result<Json<solo_query::DocumentInspectResult>, ApiError> {
1377    let doc_id = DocumentId::from_str(&id)
1378        .map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1379    let result_opt =
1380        solo_query::inspect_document(tenant.read(), tenant.audit(), principal, &doc_id)
1381            .await
1382            .map_err(ApiError::from)?;
1383    match result_opt {
1384        Some(record) => Ok(Json(record)),
1385        None => Err(ApiError::not_found(format!("document {doc_id} not found"))),
1386    }
1387}
1388
1389#[derive(Debug, Deserialize)]
1390struct ListDocumentsQuery {
1391    #[serde(default = "default_list_documents_limit")]
1392    limit: usize,
1393    #[serde(default)]
1394    offset: usize,
1395    #[serde(default)]
1396    include_forgotten: bool,
1397}
1398
1399fn default_list_documents_limit() -> usize {
1400    20
1401}
1402
1403async fn list_documents_handler(
1404    TenantExtractor(tenant): TenantExtractor,
1405    AuditPrincipal(principal): AuditPrincipal,
1406    Query(q): Query<ListDocumentsQuery>,
1407) -> Result<Json<Vec<solo_query::DocumentSummary>>, ApiError> {
1408    let rows = solo_query::list_documents(
1409        tenant.read(),
1410        tenant.audit(),
1411        principal,
1412        q.limit,
1413        q.offset,
1414        q.include_forgotten,
1415    )
1416    .await
1417    .map_err(ApiError::from)?;
1418    Ok(Json(rows))
1419}
1420
1421async fn forget_document_handler(
1422    TenantExtractor(tenant): TenantExtractor,
1423    AuditPrincipal(principal): AuditPrincipal,
1424    Path(id): Path<String>,
1425) -> Result<Json<solo_storage::ForgetDocumentReport>, ApiError> {
1426    let doc_id = DocumentId::from_str(&id)
1427        .map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1428    let report = tenant
1429        .write()
1430        .forget_document_as(principal, doc_id)
1431        .await
1432        .map_err(ApiError::from)?;
1433    Ok(Json(report))
1434}
1435
1436#[derive(Debug, Deserialize)]
1437struct ForgetQuery {
1438    #[serde(default)]
1439    reason: Option<String>,
1440}
1441
1442async fn forget_handler(
1443    TenantExtractor(tenant): TenantExtractor,
1444    AuditPrincipal(principal): AuditPrincipal,
1445    Path(id): Path<String>,
1446    Query(q): Query<ForgetQuery>,
1447) -> Result<StatusCode, ApiError> {
1448    let mid = MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1449    let reason = q.reason.unwrap_or_else(|| "http".into());
1450    tenant
1451        .write()
1452        .forget_as(principal, mid, reason)
1453        .await
1454        .map_err(ApiError::from)?;
1455    Ok(StatusCode::NO_CONTENT)
1456}
1457
1458async fn consolidate_handler(
1459    TenantExtractor(tenant): TenantExtractor,
1460    AuditPrincipal(principal): AuditPrincipal,
1461    body: axum::body::Bytes,
1462) -> Result<Json<solo_storage::ConsolidationReport>, ApiError> {
1463    // Empty body = default scope (unbounded window). We parse via
1464    // `Bytes` rather than `Option<Json<T>>` because axum's `Json`
1465    // extractor 400s on an empty body when Content-Type is JSON
1466    // (it can't deserialize zero bytes as `T`), and the `Option`
1467    // wrapper doesn't reliably degrade that failure to `None`.
1468    let scope = if body.is_empty() {
1469        solo_storage::ConsolidationScope::default()
1470    } else {
1471        serde_json::from_slice(&body)
1472            .map_err(|e| ApiError::bad_request(format!("invalid JSON: {e}")))?
1473    };
1474    let report = tenant
1475        .write()
1476        .consolidate_as(principal, scope)
1477        .await
1478        .map_err(ApiError::from)?;
1479    Ok(Json(report))
1480}
1481
1482#[derive(Debug, Deserialize)]
1483struct BackupBody {
1484    /// Server-side absolute path where the backup file should be
1485    /// written. Must be writable by the Solo process. Refuses to
1486    /// overwrite an existing file unless `force = true`.
1487    to: String,
1488    #[serde(default)]
1489    force: bool,
1490}
1491
1492#[derive(Debug, Serialize)]
1493struct BackupResponse {
1494    path: String,
1495    elapsed_ms: u64,
1496}
1497
1498async fn backup_handler(
1499    TenantExtractor(tenant): TenantExtractor,
1500    Json(body): Json<BackupBody>,
1501) -> Result<Json<BackupResponse>, ApiError> {
1502    use std::path::PathBuf;
1503
1504    let dest = PathBuf::from(&body.to);
1505    if dest.as_os_str().is_empty() {
1506        return Err(ApiError::bad_request("`to` must not be empty"));
1507    }
1508    // CRITICAL ORDER: same-file refusal MUST come BEFORE `remove_file`.
1509    // The tenant's source DB path comes from the resolved TenantHandle.
1510    if solo_storage::paths_refer_to_same_file(tenant.db_path(), &dest) {
1511        return Err(ApiError::bad_request(format!(
1512            "destination {} is the same file as the source database; \
1513             refusing to run (would corrupt the live database)",
1514            dest.display()
1515        )));
1516    }
1517    if dest.exists() {
1518        if !body.force {
1519            return Err(ApiError::bad_request(format!(
1520                "destination {} exists; pass force=true to overwrite",
1521                dest.display()
1522            )));
1523        }
1524        std::fs::remove_file(&dest).map_err(|e| {
1525            ApiError::internal(format!(
1526                "remove existing destination {}: {e}",
1527                dest.display()
1528            ))
1529        })?;
1530    }
1531    if let Some(parent) = dest.parent() {
1532        if !parent.as_os_str().is_empty() && !parent.is_dir() {
1533            return Err(ApiError::bad_request(format!(
1534                "destination parent directory {} does not exist",
1535                parent.display()
1536            )));
1537        }
1538    }
1539
1540    let started = std::time::Instant::now();
1541    tenant.write().backup(dest.clone()).await.map_err(ApiError::from)?;
1542    let elapsed_ms = started.elapsed().as_millis() as u64;
1543
1544    Ok(Json(BackupResponse {
1545        path: dest.display().to_string(),
1546        elapsed_ms,
1547    }))
1548}
1549
1550// ---------------------------------------------------------------------------
1551// Graph expand (v0.9.x — first /v1/graph/* endpoint for solo-web)
1552// ---------------------------------------------------------------------------
1553//
1554// `GET /v1/graph/expand?node_id=...&kind=...&limit=N` — read-only neighbor
1555// drill off any node. Supports four edge kinds:
1556//   * `cluster_member` — episodes ↔ clusters via `cluster_episodes`.
1557//   * `document_chunk` — documents ↔ chunks via `document_chunks.doc_id`.
1558//   * `triple`         — episodes ↔ entities via `triples` (subject_id /
1559//     object_id / source_episode_id added in migration 0007).
1560//   * `semantic`       — HNSW top-K similar episodes (re-embeds the source
1561//     episode's content via the tenant embedder, then calls the same
1562//     pipeline as `/memory/search`; cheaper than a separate embeddings-
1563//     table fetch path and reuses one well-tested code path).
1564//
1565// **Node-id prefix convention** (locked in this PR; the future
1566// `/v1/graph/nodes` + `/v1/graph/inspect/:id` endpoints will use the
1567// same scheme):
1568//   * `ep:<memory_id>`     — episode (memory_id = UUID v7)
1569//   * `doc:<doc_id>`       — document (doc_id   = UUID v7)
1570//   * `chunk:<chunk_id>`   — chunk    (chunk_id = UUID v7)
1571//   * `cl:<cluster_id>`    — cluster
1572//   * `ent:<value>`        — entity (synthetic — minted from a triple's
1573//     subject_id / object_id; value is the raw string verbatim, no
1574//     URL-encoding — `:` and other punctuation appear in real entity
1575//     ids in the wild).
1576//
1577// Entity nodes are synthetic: there's no `entities` table. They're derived
1578// on-the-fly from triples and only exist in the wire format. Two entity
1579// nodes with the same `ent:<value>` are the same node.
1580//
1581// **Read-only**: no audit emit (lesson #30 — graph expand is a derived view
1582// over already-audited primitives; the explicit-query audit events from
1583// `memory.recall` / `memory.inspect` / `memory.facts_about` cover the
1584// underlying reads).
1585//
1586// Tests live inline in `handler_tests` below.
1587
1588const GRAPH_EXPAND_DEFAULT_LIMIT: u32 = 25;
1589const GRAPH_EXPAND_MAX_LIMIT: u32 = 100;
1590
1591/// Edge-kind discriminator. Drives which expansion path runs and what edge
1592/// kind appears in the response.
1593#[derive(Debug, Clone, Copy, Deserialize)]
1594#[serde(rename_all = "snake_case")]
1595enum GraphExpandKind {
1596    ClusterMember,
1597    DocumentChunk,
1598    Triple,
1599    Semantic,
1600}
1601
1602#[derive(Debug, Deserialize)]
1603struct GraphExpandQuery {
1604    node_id: String,
1605    kind: GraphExpandKind,
1606    #[serde(default)]
1607    limit: Option<u32>,
1608}
1609
1610/// Source-node kind, derived from the `node_id` prefix.
1611#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1612enum NodeKind {
1613    Episode,
1614    Document,
1615    Chunk,
1616    Cluster,
1617    Entity,
1618}
1619
1620impl NodeKind {
1621    fn as_wire_str(self) -> &'static str {
1622        match self {
1623            Self::Episode => "episode",
1624            Self::Document => "document",
1625            Self::Chunk => "chunk",
1626            Self::Cluster => "cluster",
1627            Self::Entity => "entity",
1628        }
1629    }
1630}
1631
1632/// Decompose `<prefix>:<value>` into (kind, raw value). Returns 400 on
1633/// unknown prefix / empty value / no `:`.
1634fn parse_node_id(raw: &str) -> Result<(NodeKind, &str), ApiError> {
1635    let (prefix, value) = raw.split_once(':').ok_or_else(|| {
1636        ApiError::bad_request(format!(
1637            "node_id must be `<prefix>:<value>` (one of ep:/doc:/chunk:/cl:/ent:); got {raw:?}"
1638        ))
1639    })?;
1640    if value.is_empty() {
1641        return Err(ApiError::bad_request(format!(
1642            "node_id value is empty after prefix: {raw:?}"
1643        )));
1644    }
1645    let kind = match prefix {
1646        "ep" => NodeKind::Episode,
1647        "doc" => NodeKind::Document,
1648        "chunk" => NodeKind::Chunk,
1649        "cl" => NodeKind::Cluster,
1650        "ent" => NodeKind::Entity,
1651        other => {
1652            return Err(ApiError::bad_request(format!(
1653                "unknown node_id prefix {other:?}; expected one of ep:/doc:/chunk:/cl:/ent:"
1654            )));
1655        }
1656    };
1657    Ok((kind, value))
1658}
1659
1660/// One node in the graph-expand response. Mirrors solo-web's `GraphNode`
1661/// TS interface (see `solo-web/src/api/types.ts`).
1662#[derive(Debug, Serialize)]
1663struct GraphNode {
1664    id: String,
1665    kind: &'static str,
1666    label: String,
1667    #[serde(skip_serializing_if = "Option::is_none")]
1668    ts_ms: Option<i64>,
1669    tenant_id: String,
1670    #[serde(skip_serializing_if = "Option::is_none")]
1671    preview: Option<String>,
1672}
1673
1674/// One edge. Mirrors `GraphEdge` in solo-web TS types. `id` is a composite
1675/// `${source}--${kind}--${target}` so the renderer can dedupe.
1676#[derive(Debug, Serialize)]
1677struct GraphEdge {
1678    id: String,
1679    source: String,
1680    target: String,
1681    kind: &'static str,
1682    #[serde(skip_serializing_if = "Option::is_none")]
1683    predicate: Option<String>,
1684    #[serde(skip_serializing_if = "Option::is_none")]
1685    weight: Option<f32>,
1686}
1687
1688#[derive(Debug, Serialize)]
1689struct GraphExpandResponse {
1690    nodes: Vec<GraphNode>,
1691    edges: Vec<GraphEdge>,
1692}
1693
1694fn edge_id(source: &str, kind: &str, target: &str) -> String {
1695    format!("{source}--{kind}--{target}")
1696}
1697
1698/// Episode summary needed to mint a `GraphNode` from an episode row.
1699#[derive(Debug)]
1700struct ExpandedEpisode {
1701    memory_id: String,
1702    ts_ms: i64,
1703    content: String,
1704}
1705
1706/// Document summary.
1707#[derive(Debug)]
1708struct ExpandedDocument {
1709    doc_id: String,
1710    title: Option<String>,
1711    source: Option<String>,
1712    ingested_at_ms: i64,
1713}
1714
1715/// Chunk summary.
1716#[derive(Debug)]
1717struct ExpandedChunk {
1718    chunk_id: String,
1719    chunk_index: i64,
1720    content: String,
1721}
1722
1723fn truncate_preview(s: &str, max: usize) -> String {
1724    if s.chars().count() <= max {
1725        return s.to_string();
1726    }
1727    let mut out: String = s.chars().take(max - 1).collect();
1728    out.push('…');
1729    out
1730}
1731
1732/// First-line label cap. Keeps payloads tight for the graph renderer
1733/// (labels are headings, not full content).
1734const GRAPH_LABEL_CHARS: usize = 80;
1735const GRAPH_PREVIEW_CHARS: usize = 200;
1736
1737fn episode_label(content: &str) -> String {
1738    let first_line = content.lines().next().unwrap_or(content);
1739    truncate_preview(first_line, GRAPH_LABEL_CHARS)
1740}
1741
1742fn graph_node_for_episode(tenant_id: &str, ep: &ExpandedEpisode) -> GraphNode {
1743    GraphNode {
1744        id: format!("ep:{}", ep.memory_id),
1745        kind: NodeKind::Episode.as_wire_str(),
1746        label: episode_label(&ep.content),
1747        ts_ms: Some(ep.ts_ms),
1748        tenant_id: tenant_id.to_string(),
1749        preview: Some(truncate_preview(&ep.content, GRAPH_PREVIEW_CHARS)),
1750    }
1751}
1752
1753fn graph_node_for_document(tenant_id: &str, d: &ExpandedDocument) -> GraphNode {
1754    let label = d
1755        .title
1756        .clone()
1757        .or_else(|| d.source.clone())
1758        .unwrap_or_else(|| d.doc_id.clone());
1759    GraphNode {
1760        id: format!("doc:{}", d.doc_id),
1761        kind: NodeKind::Document.as_wire_str(),
1762        label: truncate_preview(&label, GRAPH_LABEL_CHARS),
1763        ts_ms: Some(d.ingested_at_ms),
1764        tenant_id: tenant_id.to_string(),
1765        preview: d.source.clone(),
1766    }
1767}
1768
1769fn graph_node_for_chunk(tenant_id: &str, c: &ExpandedChunk) -> GraphNode {
1770    GraphNode {
1771        id: format!("chunk:{}", c.chunk_id),
1772        kind: NodeKind::Chunk.as_wire_str(),
1773        label: format!("chunk #{}: {}", c.chunk_index, episode_label(&c.content)),
1774        ts_ms: None,
1775        tenant_id: tenant_id.to_string(),
1776        preview: Some(truncate_preview(&c.content, GRAPH_PREVIEW_CHARS)),
1777    }
1778}
1779
1780fn graph_node_for_cluster(
1781    tenant_id: &str,
1782    cluster_id: &str,
1783    abstraction: Option<&str>,
1784    created_at_ms: i64,
1785) -> GraphNode {
1786    let label = abstraction
1787        .map(|a| truncate_preview(a, GRAPH_LABEL_CHARS))
1788        .unwrap_or_else(|| format!("cluster {cluster_id}"));
1789    GraphNode {
1790        id: format!("cl:{cluster_id}"),
1791        kind: NodeKind::Cluster.as_wire_str(),
1792        label,
1793        ts_ms: Some(created_at_ms),
1794        tenant_id: tenant_id.to_string(),
1795        preview: abstraction.map(|a| truncate_preview(a, GRAPH_PREVIEW_CHARS)),
1796    }
1797}
1798
1799fn graph_node_for_entity(tenant_id: &str, value: &str) -> GraphNode {
1800    GraphNode {
1801        id: format!("ent:{value}"),
1802        kind: NodeKind::Entity.as_wire_str(),
1803        label: truncate_preview(value, GRAPH_LABEL_CHARS),
1804        ts_ms: None,
1805        tenant_id: tenant_id.to_string(),
1806        preview: None,
1807    }
1808}
1809
1810/// `GET /v1/graph/expand`. See module-level comments for the contract.
1811async fn graph_expand_handler(
1812    TenantExtractor(tenant): TenantExtractor,
1813    Query(q): Query<GraphExpandQuery>,
1814) -> Result<Json<GraphExpandResponse>, ApiError> {
1815    // Silent clamp at GRAPH_EXPAND_MAX_LIMIT — matches the rest of
1816    // solo-query's read pipelines (recall, themes, etc.). Documented in
1817    // the OpenAPI spec.
1818    let limit = q.limit.unwrap_or(GRAPH_EXPAND_DEFAULT_LIMIT);
1819    let limit = limit.clamp(1, GRAPH_EXPAND_MAX_LIMIT) as i64;
1820
1821    let (node_kind, value) = parse_node_id(&q.node_id)?;
1822    let value = value.to_string();
1823    let node_id_full = q.node_id.clone();
1824    let tenant_id_str = tenant.tenant_id().to_string();
1825
1826    match q.kind {
1827        GraphExpandKind::ClusterMember => {
1828            expand_cluster_member(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit)
1829                .await
1830        }
1831        GraphExpandKind::DocumentChunk => {
1832            expand_document_chunk(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit)
1833                .await
1834        }
1835        GraphExpandKind::Triple => {
1836            expand_triple(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit).await
1837        }
1838        GraphExpandKind::Semantic => {
1839            expand_semantic(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit).await
1840        }
1841    }
1842    .map(Json)
1843}
1844
1845// ---- cluster_member ----
1846
1847async fn expand_cluster_member(
1848    tenant: &TenantHandle,
1849    tenant_id: &str,
1850    node_kind: NodeKind,
1851    value: &str,
1852    node_id_full: &str,
1853    limit: i64,
1854) -> Result<GraphExpandResponse, ApiError> {
1855    match node_kind {
1856        NodeKind::Episode => expand_cluster_member_from_episode(
1857            tenant,
1858            tenant_id,
1859            value.to_string(),
1860            node_id_full.to_string(),
1861            limit,
1862        )
1863        .await,
1864        NodeKind::Cluster => expand_cluster_member_from_cluster(
1865            tenant,
1866            tenant_id,
1867            value.to_string(),
1868            node_id_full.to_string(),
1869            limit,
1870        )
1871        .await,
1872        _ => Err(ApiError::bad_request(format!(
1873            "kind=cluster_member only valid for episode or cluster source nodes; got {}",
1874            node_kind.as_wire_str()
1875        ))),
1876    }
1877}
1878
1879async fn expand_cluster_member_from_episode(
1880    tenant: &TenantHandle,
1881    tenant_id: &str,
1882    memory_id: String,
1883    node_id_full: String,
1884    limit: i64,
1885) -> Result<GraphExpandResponse, ApiError> {
1886    let memory_id_for_err = memory_id.clone();
1887    let rows: Vec<(String, Option<String>, i64)> = tenant
1888        .read()
1889        .interact(move |conn| {
1890            // First confirm the source episode exists in this tenant.
1891            let exists: i64 = conn.query_row(
1892                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
1893                rusqlite::params![&memory_id],
1894                |r| r.get(0),
1895            )?;
1896            if exists == 0 {
1897                return Ok(Vec::new());
1898            }
1899            let mut stmt = conn.prepare(
1900                "SELECT c.cluster_id, sa.content, c.created_at_ms
1901                   FROM cluster_episodes ce
1902                   JOIN clusters c ON c.cluster_id = ce.cluster_id
1903                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
1904                  WHERE ce.memory_id = ?1
1905                  ORDER BY c.created_at_ms DESC
1906                  LIMIT ?2",
1907            )?;
1908            let mapped = stmt
1909                .query_map(rusqlite::params![&memory_id, limit], |r| {
1910                    Ok((
1911                        r.get::<_, String>(0)?,
1912                        r.get::<_, Option<String>>(1)?,
1913                        r.get::<_, i64>(2)?,
1914                    ))
1915                })?
1916                .collect::<rusqlite::Result<Vec<_>>>()?;
1917            // Marker tuple to signal "episode found" via Vec emptiness +
1918            // an extra sentinel; we use a different shape:
1919            // pack the "found" flag via an out-of-band trick — actually
1920            // we re-query above. Keep it simple: confirm again here by
1921            // returning the rows; a missing episode short-circuits to
1922            // a 404 below via the `exists == 0` guard.
1923            Ok::<_, rusqlite::Error>(mapped)
1924        })
1925        .await
1926        .map_err(ApiError::from)?;
1927
1928    // The interact() returns Vec<(...)>; but we need to distinguish "no
1929    // such episode" (→ 404) from "episode exists, has no clusters" (→
1930    // 200 with empty arrays). Re-run a cheap existence check separately
1931    // — we already inlined it above and returned `Vec::new()` on miss,
1932    // but a real miss is indistinguishable from "episode in zero
1933    // clusters". Use a separate existence probe.
1934    if rows.is_empty() {
1935        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
1936        return Ok(GraphExpandResponse {
1937            nodes: Vec::new(),
1938            edges: Vec::new(),
1939        });
1940    }
1941
1942    let mut nodes = Vec::with_capacity(rows.len());
1943    let mut edges = Vec::with_capacity(rows.len());
1944    for (cluster_id, abstraction, created_at_ms) in rows {
1945        let target_id = format!("cl:{cluster_id}");
1946        edges.push(GraphEdge {
1947            id: edge_id(&node_id_full, "cluster_member", &target_id),
1948            source: node_id_full.clone(),
1949            target: target_id,
1950            kind: "cluster_member",
1951            predicate: None,
1952            weight: None,
1953        });
1954        nodes.push(graph_node_for_cluster(
1955            tenant_id,
1956            &cluster_id,
1957            abstraction.as_deref(),
1958            created_at_ms,
1959        ));
1960    }
1961    Ok(GraphExpandResponse { nodes, edges })
1962}
1963
1964async fn expand_cluster_member_from_cluster(
1965    tenant: &TenantHandle,
1966    tenant_id: &str,
1967    cluster_id: String,
1968    node_id_full: String,
1969    limit: i64,
1970) -> Result<GraphExpandResponse, ApiError> {
1971    let cluster_id_for_err = cluster_id.clone();
1972    let rows: Vec<ExpandedEpisode> = tenant
1973        .read()
1974        .interact(move |conn| {
1975            let exists: i64 = conn.query_row(
1976                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
1977                rusqlite::params![&cluster_id],
1978                |r| r.get(0),
1979            )?;
1980            if exists == 0 {
1981                return Ok(Vec::new());
1982            }
1983            let mut stmt = conn.prepare(
1984                "SELECT e.memory_id, e.ts_ms, e.content
1985                   FROM cluster_episodes ce
1986                   JOIN episodes e ON e.memory_id = ce.memory_id
1987                  WHERE ce.cluster_id = ?1
1988                    AND e.status = 'active'
1989                  ORDER BY e.ts_ms DESC
1990                  LIMIT ?2",
1991            )?;
1992            let mapped = stmt
1993                .query_map(rusqlite::params![&cluster_id, limit], |r| {
1994                    Ok(ExpandedEpisode {
1995                        memory_id: r.get(0)?,
1996                        ts_ms: r.get(1)?,
1997                        content: r.get(2)?,
1998                    })
1999                })?
2000                .collect::<rusqlite::Result<Vec<_>>>()?;
2001            Ok::<_, rusqlite::Error>(mapped)
2002        })
2003        .await
2004        .map_err(ApiError::from)?;
2005
2006    if rows.is_empty() {
2007        ensure_cluster_exists(tenant, &cluster_id_for_err, &node_id_full).await?;
2008        return Ok(GraphExpandResponse {
2009            nodes: Vec::new(),
2010            edges: Vec::new(),
2011        });
2012    }
2013
2014    let mut nodes = Vec::with_capacity(rows.len());
2015    let mut edges = Vec::with_capacity(rows.len());
2016    for ep in rows {
2017        let target_id = format!("ep:{}", ep.memory_id);
2018        edges.push(GraphEdge {
2019            id: edge_id(&node_id_full, "cluster_member", &target_id),
2020            source: node_id_full.clone(),
2021            target: target_id,
2022            kind: "cluster_member",
2023            predicate: None,
2024            weight: None,
2025        });
2026        nodes.push(graph_node_for_episode(tenant_id, &ep));
2027    }
2028    Ok(GraphExpandResponse { nodes, edges })
2029}
2030
2031// ---- document_chunk ----
2032
2033async fn expand_document_chunk(
2034    tenant: &TenantHandle,
2035    tenant_id: &str,
2036    node_kind: NodeKind,
2037    value: &str,
2038    node_id_full: &str,
2039    limit: i64,
2040) -> Result<GraphExpandResponse, ApiError> {
2041    match node_kind {
2042        NodeKind::Document => expand_document_chunk_from_document(
2043            tenant,
2044            tenant_id,
2045            value.to_string(),
2046            node_id_full.to_string(),
2047            limit,
2048        )
2049        .await,
2050        NodeKind::Chunk => expand_document_chunk_from_chunk(
2051            tenant,
2052            tenant_id,
2053            value.to_string(),
2054            node_id_full.to_string(),
2055        )
2056        .await,
2057        _ => Err(ApiError::bad_request(format!(
2058            "kind=document_chunk only valid for document or chunk source nodes; got {}",
2059            node_kind.as_wire_str()
2060        ))),
2061    }
2062}
2063
2064async fn expand_document_chunk_from_document(
2065    tenant: &TenantHandle,
2066    tenant_id: &str,
2067    doc_id: String,
2068    node_id_full: String,
2069    limit: i64,
2070) -> Result<GraphExpandResponse, ApiError> {
2071    let doc_id_for_err = doc_id.clone();
2072    let rows: Vec<ExpandedChunk> = tenant
2073        .read()
2074        .interact(move |conn| {
2075            let exists: i64 = conn.query_row(
2076                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
2077                rusqlite::params![&doc_id],
2078                |r| r.get(0),
2079            )?;
2080            if exists == 0 {
2081                return Ok(Vec::new());
2082            }
2083            let mut stmt = conn.prepare(
2084                "SELECT chunk_id, chunk_index, content
2085                   FROM document_chunks
2086                  WHERE doc_id = ?1
2087                  ORDER BY chunk_index ASC
2088                  LIMIT ?2",
2089            )?;
2090            let mapped = stmt
2091                .query_map(rusqlite::params![&doc_id, limit], |r| {
2092                    Ok(ExpandedChunk {
2093                        chunk_id: r.get(0)?,
2094                        chunk_index: r.get(1)?,
2095                        content: r.get(2)?,
2096                    })
2097                })?
2098                .collect::<rusqlite::Result<Vec<_>>>()?;
2099            Ok::<_, rusqlite::Error>(mapped)
2100        })
2101        .await
2102        .map_err(ApiError::from)?;
2103
2104    if rows.is_empty() {
2105        ensure_document_exists(tenant, &doc_id_for_err, &node_id_full).await?;
2106        return Ok(GraphExpandResponse {
2107            nodes: Vec::new(),
2108            edges: Vec::new(),
2109        });
2110    }
2111
2112    let mut nodes = Vec::with_capacity(rows.len());
2113    let mut edges = Vec::with_capacity(rows.len());
2114    for c in rows {
2115        let target_id = format!("chunk:{}", c.chunk_id);
2116        edges.push(GraphEdge {
2117            id: edge_id(&node_id_full, "document_chunk", &target_id),
2118            source: node_id_full.clone(),
2119            target: target_id,
2120            kind: "document_chunk",
2121            predicate: None,
2122            weight: None,
2123        });
2124        nodes.push(graph_node_for_chunk(tenant_id, &c));
2125    }
2126    Ok(GraphExpandResponse { nodes, edges })
2127}
2128
2129async fn expand_document_chunk_from_chunk(
2130    tenant: &TenantHandle,
2131    tenant_id: &str,
2132    chunk_id: String,
2133    node_id_full: String,
2134) -> Result<GraphExpandResponse, ApiError> {
2135    let chunk_id_for_err = chunk_id.clone();
2136    let row: Option<ExpandedDocument> = tenant
2137        .read()
2138        .interact(move |conn| {
2139            conn.query_row(
2140                "SELECT d.doc_id, d.title, d.source, d.ingested_at_ms
2141                   FROM document_chunks c
2142                   JOIN documents d ON d.doc_id = c.doc_id
2143                  WHERE c.chunk_id = ?1",
2144                rusqlite::params![&chunk_id],
2145                |r| {
2146                    Ok(ExpandedDocument {
2147                        doc_id: r.get(0)?,
2148                        title: r.get(1)?,
2149                        source: r.get(2)?,
2150                        ingested_at_ms: r.get(3)?,
2151                    })
2152                },
2153            )
2154            .map(Some)
2155            .or_else(|e| match e {
2156                rusqlite::Error::QueryReturnedNoRows => Ok(None),
2157                other => Err(other),
2158            })
2159        })
2160        .await
2161        .map_err(ApiError::from)?;
2162
2163    let d = row.ok_or_else(|| {
2164        ApiError::not_found(format!(
2165            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
2166        ))
2167    })?;
2168    let target_id = format!("doc:{}", d.doc_id);
2169    let edge = GraphEdge {
2170        id: edge_id(&node_id_full, "document_chunk", &target_id),
2171        source: node_id_full.clone(),
2172        target: target_id,
2173        kind: "document_chunk",
2174        predicate: None,
2175        weight: None,
2176    };
2177    let node = graph_node_for_document(tenant_id, &d);
2178    Ok(GraphExpandResponse {
2179        nodes: vec![node],
2180        edges: vec![edge],
2181    })
2182}
2183
2184// ---- triple ----
2185
2186async fn expand_triple(
2187    tenant: &TenantHandle,
2188    tenant_id: &str,
2189    node_kind: NodeKind,
2190    value: &str,
2191    node_id_full: &str,
2192    limit: i64,
2193) -> Result<GraphExpandResponse, ApiError> {
2194    match node_kind {
2195        NodeKind::Episode => expand_triple_from_episode(
2196            tenant,
2197            tenant_id,
2198            value.to_string(),
2199            node_id_full.to_string(),
2200            limit,
2201        )
2202        .await,
2203        NodeKind::Entity => expand_triple_from_entity(
2204            tenant,
2205            tenant_id,
2206            value.to_string(),
2207            node_id_full.to_string(),
2208            limit,
2209        )
2210        .await,
2211        _ => Err(ApiError::bad_request(format!(
2212            "kind=triple only valid for episode or entity source nodes; got {}",
2213            node_kind.as_wire_str()
2214        ))),
2215    }
2216}
2217
2218#[derive(Debug)]
2219struct TripleRow {
2220    subject_id: String,
2221    predicate: String,
2222    object_id: String,
2223    confidence: f32,
2224}
2225
2226async fn expand_triple_from_episode(
2227    tenant: &TenantHandle,
2228    tenant_id: &str,
2229    memory_id: String,
2230    node_id_full: String,
2231    limit: i64,
2232) -> Result<GraphExpandResponse, ApiError> {
2233    let memory_id_for_err = memory_id.clone();
2234    let rows: Vec<TripleRow> = tenant
2235        .read()
2236        .interact(move |conn| {
2237            // Episode rowid lookup (triples FK is INTEGER rowid, not memory_id).
2238            let rowid_opt: Option<i64> = conn
2239                .query_row(
2240                    "SELECT rowid FROM episodes WHERE memory_id = ?1",
2241                    rusqlite::params![&memory_id],
2242                    |r| r.get(0),
2243                )
2244                .map(Some)
2245                .or_else(|e| match e {
2246                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
2247                    other => Err(other),
2248                })?;
2249            let Some(rowid) = rowid_opt else {
2250                return Ok(Vec::new());
2251            };
2252            let mut stmt = conn.prepare(
2253                "SELECT subject_id, predicate, object_id, confidence
2254                   FROM triples
2255                  WHERE source_episode_id = ?1
2256                    AND status = 'active'
2257                  ORDER BY valid_from_ms DESC
2258                  LIMIT ?2",
2259            )?;
2260            let mapped = stmt
2261                .query_map(rusqlite::params![rowid, limit], |r| {
2262                    Ok(TripleRow {
2263                        subject_id: r.get(0)?,
2264                        predicate: r.get(1)?,
2265                        object_id: r.get(2)?,
2266                        confidence: r.get(3)?,
2267                    })
2268                })?
2269                .collect::<rusqlite::Result<Vec<_>>>()?;
2270            Ok::<_, rusqlite::Error>(mapped)
2271        })
2272        .await
2273        .map_err(ApiError::from)?;
2274
2275    if rows.is_empty() {
2276        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
2277        return Ok(GraphExpandResponse {
2278            nodes: Vec::new(),
2279            edges: Vec::new(),
2280        });
2281    }
2282
2283    let mut nodes = Vec::new();
2284    let mut edges = Vec::new();
2285    let mut seen_entities: std::collections::HashSet<String> = Default::default();
2286    for t in rows {
2287        // Mint both endpoints as entity nodes. The source episode is
2288        // node_id_full; each triple becomes two edges (source→subj +
2289        // subj→obj) connected through the entity nodes, OR a single
2290        // edge labelled with the predicate from the source episode to
2291        // a representative entity. The TS schema treats `triple` as a
2292        // single edge with `predicate`; we emit one edge per triple:
2293        // source_episode → subject_entity (kind=triple, predicate=p),
2294        // plus one extra edge subject_entity → object_entity (also
2295        // kind=triple, same predicate) so a renderer can hop along the
2296        // SPO graph.
2297        let subj_id = format!("ent:{}", t.subject_id);
2298        let obj_id = format!("ent:{}", t.object_id);
2299        if seen_entities.insert(t.subject_id.clone()) {
2300            nodes.push(graph_node_for_entity(tenant_id, &t.subject_id));
2301        }
2302        if seen_entities.insert(t.object_id.clone()) {
2303            nodes.push(graph_node_for_entity(tenant_id, &t.object_id));
2304        }
2305        edges.push(GraphEdge {
2306            id: edge_id(&subj_id, "triple", &obj_id),
2307            source: subj_id,
2308            target: obj_id,
2309            kind: "triple",
2310            predicate: Some(t.predicate),
2311            weight: Some(t.confidence),
2312        });
2313    }
2314    Ok(GraphExpandResponse { nodes, edges })
2315}
2316
2317async fn expand_triple_from_entity(
2318    tenant: &TenantHandle,
2319    tenant_id: &str,
2320    entity_value: String,
2321    node_id_full: String,
2322    limit: i64,
2323) -> Result<GraphExpandResponse, ApiError> {
2324    // Entity nodes are synthetic — there's no existence check we can
2325    // run. "Unknown entity" naturally resolves to an empty result.
2326    let entity_q = entity_value.clone();
2327    let rows: Vec<ExpandedEpisode> = tenant
2328        .read()
2329        .interact(move |conn| {
2330            // Find episodes whose triples reference this entity on either
2331            // side. JOIN against episodes.rowid via triples.source_episode_id.
2332            let mut stmt = conn.prepare(
2333                "SELECT DISTINCT e.memory_id, e.ts_ms, e.content
2334                   FROM triples t
2335                   JOIN episodes e ON e.rowid = t.source_episode_id
2336                  WHERE (t.subject_id = ?1 OR t.object_id = ?1)
2337                    AND t.status = 'active'
2338                    AND t.source_episode_id IS NOT NULL
2339                    AND e.status = 'active'
2340                  ORDER BY e.ts_ms DESC
2341                  LIMIT ?2",
2342            )?;
2343            let mapped = stmt
2344                .query_map(rusqlite::params![&entity_q, limit], |r| {
2345                    Ok(ExpandedEpisode {
2346                        memory_id: r.get(0)?,
2347                        ts_ms: r.get(1)?,
2348                        content: r.get(2)?,
2349                    })
2350                })?
2351                .collect::<rusqlite::Result<Vec<_>>>()?;
2352            Ok::<_, rusqlite::Error>(mapped)
2353        })
2354        .await
2355        .map_err(ApiError::from)?;
2356
2357    // Empty result on entity expand is a valid 200 — the entity exists
2358    // only in the wire format; "no edges" is the right answer.
2359    let mut nodes = Vec::with_capacity(rows.len());
2360    let mut edges = Vec::with_capacity(rows.len());
2361    for ep in rows {
2362        let target_id = format!("ep:{}", ep.memory_id);
2363        edges.push(GraphEdge {
2364            id: edge_id(&node_id_full, "triple", &target_id),
2365            source: node_id_full.clone(),
2366            target: target_id,
2367            kind: "triple",
2368            predicate: None,
2369            weight: None,
2370        });
2371        nodes.push(graph_node_for_episode(tenant_id, &ep));
2372    }
2373    // Annotate _ to suppress unused (only used in match guard).
2374    let _ = entity_value;
2375    Ok(GraphExpandResponse { nodes, edges })
2376}
2377
2378// ---- semantic ----
2379
2380async fn expand_semantic(
2381    tenant: &TenantHandle,
2382    tenant_id: &str,
2383    node_kind: NodeKind,
2384    value: &str,
2385    node_id_full: &str,
2386    limit: i64,
2387) -> Result<GraphExpandResponse, ApiError> {
2388    if node_kind != NodeKind::Episode {
2389        return Err(ApiError::bad_request(format!(
2390            "kind=semantic only valid for episode source nodes; got {}",
2391            node_kind.as_wire_str()
2392        )));
2393    }
2394    let memory_id = value.to_string();
2395    let memory_id_q = memory_id.clone();
2396    // Fetch the source episode's content so we can re-embed it and call
2397    // the existing HNSW pipeline. Cheaper-than-extra-machinery: reuses
2398    // the well-tested `run_recall_inner` path that already filters
2399    // forgotten rows + decodes hnsw ids.
2400    let content: Option<String> = tenant
2401        .read()
2402        .interact(move |conn| {
2403            conn.query_row(
2404                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
2405                rusqlite::params![&memory_id_q],
2406                |r| r.get::<_, String>(0),
2407            )
2408            .map(Some)
2409            .or_else(|e| match e {
2410                rusqlite::Error::QueryReturnedNoRows => Ok(None),
2411                other => Err(other),
2412            })
2413        })
2414        .await
2415        .map_err(ApiError::from)?;
2416
2417    let content = content.ok_or_else(|| {
2418        ApiError::not_found(format!(
2419            "node_id {node_id_full:?} (memory_id {memory_id}) not found in current tenant"
2420        ))
2421    })?;
2422
2423    // Pull one extra hit so we can drop self without losing user-requested
2424    // count. limit is already ≤ MAX_LIMIT; +1 stays within reason.
2425    let widened = (limit as usize).saturating_add(1).min(100);
2426    let result = solo_query::recall::run_recall_inner(
2427        tenant.embedder(),
2428        tenant.hnsw(),
2429        tenant.read(),
2430        &content,
2431        widened,
2432    )
2433    .await
2434    .map_err(ApiError::from)?;
2435
2436    let mut nodes = Vec::new();
2437    let mut edges = Vec::new();
2438    for hit in result.hits.into_iter() {
2439        if hit.memory_id == memory_id {
2440            // Skip self.
2441            continue;
2442        }
2443        if nodes.len() as i64 >= limit {
2444            break;
2445        }
2446        // The HNSW `cos_distance` is a distance (smaller = more similar).
2447        // Convert to a weight in [0, 1] (larger = more similar) for the
2448        // wire format: weight = (1 - distance).max(0).
2449        let weight = (1.0 - hit.cos_distance).max(0.0);
2450        let target_id = format!("ep:{}", hit.memory_id);
2451        edges.push(GraphEdge {
2452            id: edge_id(node_id_full, "semantic", &target_id),
2453            source: node_id_full.to_string(),
2454            target: target_id,
2455            kind: "semantic",
2456            predicate: None,
2457            weight: Some(weight),
2458        });
2459        nodes.push(GraphNode {
2460            id: format!("ep:{}", hit.memory_id),
2461            kind: NodeKind::Episode.as_wire_str(),
2462            label: episode_label(&hit.content),
2463            ts_ms: None,
2464            tenant_id: tenant_id.to_string(),
2465            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
2466        });
2467    }
2468    Ok(GraphExpandResponse { nodes, edges })
2469}
2470
2471// ---- existence checks ----
2472
2473/// 404 if the memory_id has no row in this tenant's `episodes` table.
2474async fn ensure_episode_exists(
2475    tenant: &TenantHandle,
2476    memory_id: &str,
2477    node_id_full: &str,
2478) -> Result<(), ApiError> {
2479    let memory_id_q = memory_id.to_string();
2480    let exists: i64 = tenant
2481        .read()
2482        .interact(move |conn| {
2483            conn.query_row(
2484                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
2485                rusqlite::params![&memory_id_q],
2486                |r| r.get(0),
2487            )
2488        })
2489        .await
2490        .map_err(ApiError::from)?;
2491    if exists == 0 {
2492        return Err(ApiError::not_found(format!(
2493            "node_id {node_id_full:?} not found in current tenant"
2494        )));
2495    }
2496    Ok(())
2497}
2498
2499async fn ensure_cluster_exists(
2500    tenant: &TenantHandle,
2501    cluster_id: &str,
2502    node_id_full: &str,
2503) -> Result<(), ApiError> {
2504    let cluster_id_q = cluster_id.to_string();
2505    let exists: i64 = tenant
2506        .read()
2507        .interact(move |conn| {
2508            conn.query_row(
2509                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
2510                rusqlite::params![&cluster_id_q],
2511                |r| r.get(0),
2512            )
2513        })
2514        .await
2515        .map_err(ApiError::from)?;
2516    if exists == 0 {
2517        return Err(ApiError::not_found(format!(
2518            "node_id {node_id_full:?} not found in current tenant"
2519        )));
2520    }
2521    Ok(())
2522}
2523
2524async fn ensure_document_exists(
2525    tenant: &TenantHandle,
2526    doc_id: &str,
2527    node_id_full: &str,
2528) -> Result<(), ApiError> {
2529    let doc_id_q = doc_id.to_string();
2530    let exists: i64 = tenant
2531        .read()
2532        .interact(move |conn| {
2533            conn.query_row(
2534                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
2535                rusqlite::params![&doc_id_q],
2536                |r| r.get(0),
2537            )
2538        })
2539        .await
2540        .map_err(ApiError::from)?;
2541    if exists == 0 {
2542        return Err(ApiError::not_found(format!(
2543            "node_id {node_id_full:?} not found in current tenant"
2544        )));
2545    }
2546    Ok(())
2547}
2548
2549// ---------------------------------------------------------------------------
2550// Graph nodes + edges — paginated catalog reads (v0.10.0)
2551//
2552// `GET /v1/graph/nodes` and `GET /v1/graph/edges` are the bundle that
2553// powers solo-web's initial graph render. Both are read-only, both
2554// share the same tenant / auth / cursor scaffolding, both inherit the
2555// node-id prefix convention from `/v1/graph/expand` (ep:/doc:/chunk:/cl:/ent:).
2556//
2557// See `docs/dev-log/0114-graph-nodes-edges-impl.md` for the design
2558// notes (cursor format, entity scan strategy, semantic-edge rejection
2559// rationale, UNION pagination shape).
2560// ---------------------------------------------------------------------------
2561
2562const GRAPH_NODES_DEFAULT_LIMIT: u32 = 100;
2563const GRAPH_NODES_MAX_LIMIT: u32 = 1000;
2564const GRAPH_EDGES_DEFAULT_LIMIT: u32 = 200;
2565const GRAPH_EDGES_MAX_LIMIT: u32 = 2000;
2566const GRAPH_ENTITY_CAP: usize = 200;
2567
2568/// Header set when the entity scan hit `GRAPH_ENTITY_CAP` and lower-
2569/// frequency entities were dropped from the response. Clients can show
2570/// "entities truncated" UX without parsing the body.
2571const ENTITY_CAP_HEADER: &str = "x-solo-entity-cap-reached";
2572
2573#[derive(Debug, Deserialize)]
2574struct GraphNodesQuery {
2575    /// Comma-separated kinds. Empty/missing = all five kinds. Repeated
2576    /// `?kind=` query params are NOT supported by axum's `Query<T>`
2577    /// extractor for `Option<String>` (it picks one) — comma-separated
2578    /// is documented + simpler. Values: episode|document|chunk|cluster|entity.
2579    #[serde(default)]
2580    kind: Option<String>,
2581    #[serde(default)]
2582    since_ms: Option<i64>,
2583    #[serde(default)]
2584    until_ms: Option<i64>,
2585    #[serde(default)]
2586    limit: Option<u32>,
2587    #[serde(default)]
2588    cursor: Option<String>,
2589}
2590
2591#[derive(Debug, Deserialize)]
2592struct GraphEdgesQuery {
2593    #[serde(default)]
2594    node_id: Option<String>,
2595    /// Comma-separated. Default = all kinds EXCEPT semantic.
2596    /// Values: triple|document_chunk|cluster_member|semantic.
2597    #[serde(default)]
2598    r#type: Option<String>,
2599    #[serde(default)]
2600    limit: Option<u32>,
2601    #[serde(default)]
2602    cursor: Option<String>,
2603}
2604
2605#[derive(Debug, Serialize)]
2606struct GraphNodesResponse {
2607    nodes: Vec<GraphNode>,
2608    #[serde(skip_serializing_if = "Option::is_none")]
2609    next_cursor: Option<String>,
2610}
2611
2612#[derive(Debug, Serialize)]
2613struct GraphEdgesResponse {
2614    edges: Vec<GraphEdge>,
2615    #[serde(skip_serializing_if = "Option::is_none")]
2616    next_cursor: Option<String>,
2617}
2618
2619/// Decode the `kind` filter from the query string. Returns the set of
2620/// kinds the caller wants (all five when filter absent / empty). 400 on
2621/// unknown kind.
2622fn parse_node_kind_filter(raw: Option<&str>) -> Result<Vec<NodeKind>, ApiError> {
2623    let raw = raw.unwrap_or("").trim();
2624    if raw.is_empty() {
2625        return Ok(vec![
2626            NodeKind::Episode,
2627            NodeKind::Document,
2628            NodeKind::Chunk,
2629            NodeKind::Cluster,
2630            NodeKind::Entity,
2631        ]);
2632    }
2633    let mut out = Vec::new();
2634    for token in raw.split(',') {
2635        let token = token.trim();
2636        if token.is_empty() {
2637            continue;
2638        }
2639        let kind = match token {
2640            "episode" => NodeKind::Episode,
2641            "document" => NodeKind::Document,
2642            "chunk" => NodeKind::Chunk,
2643            "cluster" => NodeKind::Cluster,
2644            "entity" => NodeKind::Entity,
2645            other => {
2646                return Err(ApiError::bad_request(format!(
2647                    "unknown node kind {other:?}; expected one of episode/document/chunk/cluster/entity"
2648                )));
2649            }
2650        };
2651        if !out.contains(&kind) {
2652            out.push(kind);
2653        }
2654    }
2655    if out.is_empty() {
2656        return Err(ApiError::bad_request(
2657            "kind filter is empty after parsing; either omit or list at least one kind",
2658        ));
2659    }
2660    Ok(out)
2661}
2662
2663/// Edge-kind discriminator on `/v1/graph/edges`.
2664#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2665enum EdgeKind {
2666    Triple,
2667    DocumentChunk,
2668    ClusterMember,
2669}
2670
2671impl EdgeKind {
2672    /// Sort-stable kind ordering for pagination. Lower runs first.
2673    fn order_idx(self) -> u8 {
2674        match self {
2675            Self::Triple => 0,
2676            Self::DocumentChunk => 1,
2677            Self::ClusterMember => 2,
2678        }
2679    }
2680}
2681
2682fn parse_edge_kind_filter(raw: Option<&str>) -> Result<Vec<EdgeKind>, ApiError> {
2683    let raw = raw.unwrap_or("").trim();
2684    if raw.is_empty() {
2685        // Default = all three concrete kinds; semantic is opt-in via
2686        // /v1/graph/neighbors/:id (per scoping doc §3 Decision B).
2687        return Ok(vec![
2688            EdgeKind::Triple,
2689            EdgeKind::DocumentChunk,
2690            EdgeKind::ClusterMember,
2691        ]);
2692    }
2693    let mut out = Vec::new();
2694    for token in raw.split(',') {
2695        let token = token.trim();
2696        if token.is_empty() {
2697            continue;
2698        }
2699        let kind = match token {
2700            "triple" => EdgeKind::Triple,
2701            "document_chunk" => EdgeKind::DocumentChunk,
2702            "cluster_member" => EdgeKind::ClusterMember,
2703            "semantic" => {
2704                // semantic edges aren't precomputed; they're HNSW queries
2705                // at request time. Wrong endpoint.
2706                return Err(ApiError::bad_request(
2707                    "semantic edges are available via /v1/graph/neighbors/:id?kind=semantic, not /v1/graph/edges (semantic edges aren't precomputed; they're query-time HNSW lookups)",
2708                ));
2709            }
2710            other => {
2711                return Err(ApiError::bad_request(format!(
2712                    "unknown edge type {other:?}; expected one of triple/document_chunk/cluster_member"
2713                )));
2714            }
2715        };
2716        if !out.contains(&kind) {
2717            out.push(kind);
2718        }
2719    }
2720    if out.is_empty() {
2721        return Err(ApiError::bad_request(
2722            "type filter is empty after parsing; either omit or list at least one type",
2723        ));
2724    }
2725    Ok(out)
2726}
2727
2728/// Opaque cursor for `/v1/graph/nodes`. Encodes the last item's
2729/// `(ts_ms, id)` so the next page is `WHERE (ts_ms, id) < (cursor.ts_ms,
2730/// cursor.id)` under sort `ts_ms DESC, id ASC`.
2731#[derive(Debug, Serialize, Deserialize)]
2732struct NodesCursor {
2733    ts_ms: i64,
2734    id: String,
2735}
2736
2737/// Opaque cursor for `/v1/graph/edges`. Encodes the last item's
2738/// `(kind_idx, sub_id)` so the next page resumes at `> cursor` under
2739/// sort `(kind_idx ASC, sub_id ASC)`. `sub_id` is the per-kind stable
2740/// row id (triple_id for triples, chunk_id for document_chunk, the
2741/// composite `cluster_id||memory_id` string for cluster_member).
2742#[derive(Debug, Serialize, Deserialize)]
2743struct EdgesCursor {
2744    kind_idx: u8,
2745    sub_id: String,
2746}
2747
2748fn encode_cursor<T: Serialize>(value: &T) -> Result<String, ApiError> {
2749    use base64::Engine;
2750    let json = serde_json::to_vec(value).map_err(|e| {
2751        ApiError::internal(format!("cursor serialize: {e}"))
2752    })?;
2753    Ok(base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(json))
2754}
2755
2756fn decode_cursor<T: for<'de> Deserialize<'de>>(raw: &str) -> Result<T, ApiError> {
2757    use base64::Engine;
2758    let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
2759        .decode(raw.as_bytes())
2760        .map_err(|e| ApiError::bad_request(format!("cursor: bad base64: {e}")))?;
2761    serde_json::from_slice::<T>(&bytes)
2762        .map_err(|e| ApiError::bad_request(format!("cursor: bad JSON payload: {e}")))
2763}
2764
2765/// Internal staging row for the nodes endpoint. Carries the GraphNode
2766/// plus the sort key so we can merge all kinds before applying the
2767/// pagination cut.
2768#[derive(Debug)]
2769struct StagingNode {
2770    node: GraphNode,
2771    sort_ts_ms: i64,
2772    sort_id: String,
2773}
2774
2775/// Apply `ts_ms DESC, id ASC` ordering. (Newest first, deterministic
2776/// tie-break on id.)
2777fn cmp_node_sort_keys(a: (i64, &str), b: (i64, &str)) -> std::cmp::Ordering {
2778    // ts_ms DESC: invert
2779    match b.0.cmp(&a.0) {
2780        std::cmp::Ordering::Equal => a.1.cmp(b.1), // id ASC
2781        other => other,
2782    }
2783}
2784
2785/// True if `(ts_ms, id)` strictly comes AFTER `cursor` under the canonical
2786/// sort `ts_ms DESC, id ASC` — i.e. is admissible into a page following
2787/// the cursor.
2788fn node_passes_cursor(ts_ms: i64, id: &str, cursor: &NodesCursor) -> bool {
2789    cmp_node_sort_keys((ts_ms, id), (cursor.ts_ms, cursor.id.as_str()))
2790        == std::cmp::Ordering::Greater
2791}
2792
2793// --- Per-kind row fetchers (each runs a bounded query, applies the time
2794//     filter, returns rows already sorted `ts_ms DESC, id ASC`).
2795
2796#[derive(Debug)]
2797struct NodeRowEp {
2798    memory_id: String,
2799    ts_ms: i64,
2800    content: String,
2801}
2802
2803fn fetch_episodes_for_nodes(
2804    conn: &rusqlite::Connection,
2805    since_ms: Option<i64>,
2806    until_ms: Option<i64>,
2807    cursor: Option<&NodesCursor>,
2808    limit: i64,
2809) -> rusqlite::Result<Vec<NodeRowEp>> {
2810    let mut sql = String::from(
2811        "SELECT memory_id, ts_ms, content
2812           FROM episodes
2813          WHERE status = 'active'",
2814    );
2815    let mut params: Vec<rusqlite::types::Value> = Vec::new();
2816    if let Some(s) = since_ms {
2817        sql.push_str(" AND ts_ms >= ?");
2818        params.push(s.into());
2819    }
2820    if let Some(u) = until_ms {
2821        sql.push_str(" AND ts_ms <= ?");
2822        params.push(u.into());
2823    }
2824    // Cursor pre-filter: under sort `ts_ms DESC, prefixed_id ASC`,
2825    // anything strictly newer than the cursor's ts_ms is in a previous
2826    // page; rows with equal ts_ms may or may not be (depends on the
2827    // cross-kind ordering). The post-merge step applies the full
2828    // `(ts_ms, prefixed_id)` comparison; here we just discard rows
2829    // that can't possibly survive.
2830    if let Some(cur) = cursor {
2831        sql.push_str(" AND ts_ms <= ?");
2832        params.push(cur.ts_ms.into());
2833    }
2834    sql.push_str(" ORDER BY ts_ms DESC, memory_id ASC LIMIT ?");
2835    params.push(limit.into());
2836    let mut stmt = conn.prepare(&sql)?;
2837    let rows: Vec<NodeRowEp> = stmt
2838        .query_map(rusqlite::params_from_iter(params), |r| {
2839            Ok(NodeRowEp {
2840                memory_id: r.get(0)?,
2841                ts_ms: r.get(1)?,
2842                content: r.get(2)?,
2843            })
2844        })?
2845        .collect::<rusqlite::Result<Vec<_>>>()?;
2846    Ok(rows)
2847}
2848
2849#[derive(Debug)]
2850struct NodeRowDoc {
2851    doc_id: String,
2852    title: Option<String>,
2853    source: Option<String>,
2854    ingested_at_ms: i64,
2855}
2856
2857fn fetch_documents_for_nodes(
2858    conn: &rusqlite::Connection,
2859    since_ms: Option<i64>,
2860    until_ms: Option<i64>,
2861    cursor: Option<&NodesCursor>,
2862    limit: i64,
2863) -> rusqlite::Result<Vec<NodeRowDoc>> {
2864    let mut sql = String::from(
2865        "SELECT doc_id, title, source, ingested_at_ms
2866           FROM documents
2867          WHERE status = 'active'",
2868    );
2869    let mut params: Vec<rusqlite::types::Value> = Vec::new();
2870    if let Some(s) = since_ms {
2871        sql.push_str(" AND ingested_at_ms >= ?");
2872        params.push(s.into());
2873    }
2874    if let Some(u) = until_ms {
2875        sql.push_str(" AND ingested_at_ms <= ?");
2876        params.push(u.into());
2877    }
2878    if let Some(cur) = cursor {
2879        sql.push_str(" AND ingested_at_ms <= ?");
2880        params.push(cur.ts_ms.into());
2881    }
2882    sql.push_str(" ORDER BY ingested_at_ms DESC, doc_id ASC LIMIT ?");
2883    params.push(limit.into());
2884    let mut stmt = conn.prepare(&sql)?;
2885    let rows: Vec<NodeRowDoc> = stmt
2886        .query_map(rusqlite::params_from_iter(params), |r| {
2887            Ok(NodeRowDoc {
2888                doc_id: r.get(0)?,
2889                title: r.get(1)?,
2890                source: r.get(2)?,
2891                ingested_at_ms: r.get(3)?,
2892            })
2893        })?
2894        .collect::<rusqlite::Result<Vec<_>>>()?;
2895    Ok(rows)
2896}
2897
2898#[derive(Debug)]
2899struct NodeRowChunk {
2900    chunk_id: String,
2901    chunk_index: i64,
2902    content: String,
2903    created_at_ms: i64,
2904}
2905
2906fn fetch_chunks_for_nodes(
2907    conn: &rusqlite::Connection,
2908    since_ms: Option<i64>,
2909    until_ms: Option<i64>,
2910    cursor: Option<&NodesCursor>,
2911    limit: i64,
2912) -> rusqlite::Result<Vec<NodeRowChunk>> {
2913    // Filter by `document_chunks.created_at_ms`; chunks of forgotten
2914    // documents are filtered out by the join on `documents.status`.
2915    let mut sql = String::from(
2916        "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
2917           FROM document_chunks c
2918           JOIN documents d ON d.doc_id = c.doc_id
2919          WHERE d.status = 'active'",
2920    );
2921    let mut params: Vec<rusqlite::types::Value> = Vec::new();
2922    if let Some(s) = since_ms {
2923        sql.push_str(" AND c.created_at_ms >= ?");
2924        params.push(s.into());
2925    }
2926    if let Some(u) = until_ms {
2927        sql.push_str(" AND c.created_at_ms <= ?");
2928        params.push(u.into());
2929    }
2930    if let Some(cur) = cursor {
2931        sql.push_str(" AND c.created_at_ms <= ?");
2932        params.push(cur.ts_ms.into());
2933    }
2934    sql.push_str(" ORDER BY c.created_at_ms DESC, c.chunk_id ASC LIMIT ?");
2935    params.push(limit.into());
2936    let mut stmt = conn.prepare(&sql)?;
2937    let rows: Vec<NodeRowChunk> = stmt
2938        .query_map(rusqlite::params_from_iter(params), |r| {
2939            Ok(NodeRowChunk {
2940                chunk_id: r.get(0)?,
2941                chunk_index: r.get(1)?,
2942                content: r.get(2)?,
2943                created_at_ms: r.get(3)?,
2944            })
2945        })?
2946        .collect::<rusqlite::Result<Vec<_>>>()?;
2947    Ok(rows)
2948}
2949
2950#[derive(Debug)]
2951struct NodeRowCluster {
2952    cluster_id: String,
2953    abstraction: Option<String>,
2954    created_at_ms: i64,
2955}
2956
2957fn fetch_clusters_for_nodes(
2958    conn: &rusqlite::Connection,
2959    since_ms: Option<i64>,
2960    until_ms: Option<i64>,
2961    cursor: Option<&NodesCursor>,
2962    limit: i64,
2963) -> rusqlite::Result<Vec<NodeRowCluster>> {
2964    // clusters has no `status` column; LEFT JOIN abstractions for the
2965    // optional label.
2966    let mut sql = String::from(
2967        "SELECT c.cluster_id, sa.content, c.created_at_ms
2968           FROM clusters c
2969           LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
2970          WHERE 1=1",
2971    );
2972    let mut params: Vec<rusqlite::types::Value> = Vec::new();
2973    if let Some(s) = since_ms {
2974        sql.push_str(" AND c.created_at_ms >= ?");
2975        params.push(s.into());
2976    }
2977    if let Some(u) = until_ms {
2978        sql.push_str(" AND c.created_at_ms <= ?");
2979        params.push(u.into());
2980    }
2981    if let Some(cur) = cursor {
2982        sql.push_str(" AND c.created_at_ms <= ?");
2983        params.push(cur.ts_ms.into());
2984    }
2985    sql.push_str(" ORDER BY c.created_at_ms DESC, c.cluster_id ASC LIMIT ?");
2986    params.push(limit.into());
2987    let mut stmt = conn.prepare(&sql)?;
2988    let rows: Vec<NodeRowCluster> = stmt
2989        .query_map(rusqlite::params_from_iter(params), |r| {
2990            Ok(NodeRowCluster {
2991                cluster_id: r.get(0)?,
2992                abstraction: r.get(1)?,
2993                created_at_ms: r.get(2)?,
2994            })
2995        })?
2996        .collect::<rusqlite::Result<Vec<_>>>()?;
2997    Ok(rows)
2998}
2999
3000#[derive(Debug)]
3001struct NodeRowEntity {
3002    value: String,
3003    ref_count: i64,
3004    first_seen_ms: i64,
3005}
3006
3007/// Synthesize entity nodes from the triples table. Caps result at
3008/// `GRAPH_ENTITY_CAP`, ordered by `ref_count DESC` so the loudest
3009/// entities make the cut. Returns (rows, cap_reached).
3010///
3011/// **Cost**: this is O(N) over active triples per request. For tenants
3012/// with >100k triples this can be noticeable; v0.10.x can cache the
3013/// rollup if profiling justifies it. The 200-row cap keeps the wire
3014/// payload bounded regardless.
3015fn fetch_entities_for_nodes(
3016    conn: &rusqlite::Connection,
3017    since_ms: Option<i64>,
3018    until_ms: Option<i64>,
3019    cursor: Option<&NodesCursor>,
3020) -> rusqlite::Result<(Vec<NodeRowEntity>, bool)> {
3021    // Pull subject + object columns, group by value, compute count + min
3022    // ts_ms. UNION ALL the two columns into a single aggregation. Apply
3023    // time filter against `valid_from_ms` (the closest analogue to "when
3024    // was this entity first referenced").
3025    let mut sql = String::from(
3026        "WITH all_refs AS (
3027            SELECT subject_id AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
3028            UNION ALL
3029            SELECT object_id  AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
3030         )
3031         SELECT value, COUNT(*) AS ref_count, MIN(ts_ms) AS first_seen_ms
3032           FROM all_refs
3033          WHERE 1=1",
3034    );
3035    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3036    if let Some(s) = since_ms {
3037        sql.push_str(" AND ts_ms >= ?");
3038        params.push(s.into());
3039    }
3040    if let Some(u) = until_ms {
3041        sql.push_str(" AND ts_ms <= ?");
3042        params.push(u.into());
3043    }
3044    // Cursor: drop entities whose first_seen_ms strictly newer than the
3045    // cursor. We can't predicate on COUNT() until after GROUP BY, so the
3046    // cap-applicable filter sits in the HAVING clause.
3047    sql.push_str(" GROUP BY value");
3048    if let Some(ts) = cursor.map(|c| c.ts_ms) {
3049        sql.push_str(" HAVING MIN(ts_ms) <= ?");
3050        params.push(ts.into());
3051    }
3052    // Over-fetch by one to detect "cap reached".
3053    let want = GRAPH_ENTITY_CAP as i64 + 1;
3054    sql.push_str(" ORDER BY ref_count DESC, value ASC LIMIT ?");
3055    params.push(want.into());
3056    let mut stmt = conn.prepare(&sql)?;
3057    let rows: Vec<NodeRowEntity> = stmt
3058        .query_map(rusqlite::params_from_iter(params), |r| {
3059            Ok(NodeRowEntity {
3060                value: r.get(0)?,
3061                ref_count: r.get(1)?,
3062                first_seen_ms: r.get(2)?,
3063            })
3064        })?
3065        .collect::<rusqlite::Result<Vec<_>>>()?;
3066    let cap_reached = rows.len() > GRAPH_ENTITY_CAP;
3067    let mut trimmed = rows;
3068    if cap_reached {
3069        trimmed.truncate(GRAPH_ENTITY_CAP);
3070    }
3071    Ok((trimmed, cap_reached))
3072}
3073
3074/// `GET /v1/graph/nodes`. Paginated node catalog across the tenant.
3075/// See module-level comments for the contract.
3076async fn graph_nodes_handler(
3077    TenantExtractor(tenant): TenantExtractor,
3078    Query(q): Query<GraphNodesQuery>,
3079) -> Result<Response, ApiError> {
3080    let limit = q.limit.unwrap_or(GRAPH_NODES_DEFAULT_LIMIT);
3081    let limit = limit.clamp(1, GRAPH_NODES_MAX_LIMIT);
3082    let kinds = parse_node_kind_filter(q.kind.as_deref())?;
3083    let since_ms = q.since_ms;
3084    let until_ms = q.until_ms;
3085    if let (Some(s), Some(u)) = (since_ms, until_ms) {
3086        if s > u {
3087            return Err(ApiError::bad_request(format!(
3088                "since_ms ({s}) must be <= until_ms ({u})"
3089            )));
3090        }
3091    }
3092    let cursor = match q.cursor.as_deref() {
3093        None => None,
3094        Some("") => None,
3095        Some(raw) => Some(decode_cursor::<NodesCursor>(raw)?),
3096    };
3097    let want_episode = kinds.contains(&NodeKind::Episode);
3098    let want_document = kinds.contains(&NodeKind::Document);
3099    let want_chunk = kinds.contains(&NodeKind::Chunk);
3100    let want_cluster = kinds.contains(&NodeKind::Cluster);
3101    let want_entity = kinds.contains(&NodeKind::Entity);
3102
3103    // Over-fetch `limit + 2` per kind:
3104    //   * `+1` so the merge step can detect "more rows available beyond
3105    //     this page" → emits a `next_cursor` instead of None.
3106    //   * `+1` again because the SQL pre-filter `ts_ms <= cursor.ts_ms`
3107    //     can pull the previous page's last item back in; the post-merge
3108    //     cursor predicate drops it, costing one row of headroom.
3109    // The entity cap stays at GRAPH_ENTITY_CAP — entities are bounded
3110    // independently by the response cap, not the page limit.
3111    let per_kind_limit = (limit as i64).saturating_add(2);
3112    let tenant_id_for_blocking = tenant.tenant_id().to_string();
3113    let cursor_clone = cursor.as_ref().map(|c| NodesCursor {
3114        ts_ms: c.ts_ms,
3115        id: c.id.clone(),
3116    });
3117
3118    let (mut staged, cap_reached) = tenant
3119        .read()
3120        .interact(move |conn| {
3121            let mut staged: Vec<StagingNode> = Vec::new();
3122            let mut cap_reached = false;
3123            let cursor_ref = cursor_clone.as_ref();
3124
3125            if want_episode {
3126                let eps = fetch_episodes_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3127                for ep in eps {
3128                    let id = format!("ep:{}", ep.memory_id);
3129                    let exp = ExpandedEpisode {
3130                        memory_id: ep.memory_id,
3131                        ts_ms: ep.ts_ms,
3132                        content: ep.content,
3133                    };
3134                    let node = graph_node_for_episode(&tenant_id_for_blocking, &exp);
3135                    staged.push(StagingNode {
3136                        sort_ts_ms: ep.ts_ms,
3137                        sort_id: id.clone(),
3138                        node,
3139                    });
3140                }
3141            }
3142            if want_document {
3143                let docs = fetch_documents_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3144                for d in docs {
3145                    let id = format!("doc:{}", d.doc_id);
3146                    let exp = ExpandedDocument {
3147                        doc_id: d.doc_id,
3148                        title: d.title,
3149                        source: d.source,
3150                        ingested_at_ms: d.ingested_at_ms,
3151                    };
3152                    let node = graph_node_for_document(&tenant_id_for_blocking, &exp);
3153                    staged.push(StagingNode {
3154                        sort_ts_ms: d.ingested_at_ms,
3155                        sort_id: id.clone(),
3156                        node,
3157                    });
3158                }
3159            }
3160            if want_chunk {
3161                let chunks = fetch_chunks_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3162                for c in chunks {
3163                    let id = format!("chunk:{}", c.chunk_id);
3164                    let exp = ExpandedChunk {
3165                        chunk_id: c.chunk_id,
3166                        chunk_index: c.chunk_index,
3167                        content: c.content,
3168                    };
3169                    // graph_node_for_chunk sets ts_ms = None for the
3170                    // wire format (chunks don't have a natural user-
3171                    // facing timestamp); but for sorting we use the
3172                    // row's created_at_ms.
3173                    let mut node = graph_node_for_chunk(&tenant_id_for_blocking, &exp);
3174                    node.ts_ms = Some(c.created_at_ms);
3175                    staged.push(StagingNode {
3176                        sort_ts_ms: c.created_at_ms,
3177                        sort_id: id.clone(),
3178                        node,
3179                    });
3180                }
3181            }
3182            if want_cluster {
3183                let cls = fetch_clusters_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3184                for c in cls {
3185                    let id = format!("cl:{}", c.cluster_id);
3186                    let node = graph_node_for_cluster(
3187                        &tenant_id_for_blocking,
3188                        &c.cluster_id,
3189                        c.abstraction.as_deref(),
3190                        c.created_at_ms,
3191                    );
3192                    staged.push(StagingNode {
3193                        sort_ts_ms: c.created_at_ms,
3194                        sort_id: id.clone(),
3195                        node,
3196                    });
3197                }
3198            }
3199            if want_entity {
3200                let (ents, was_cap_reached) =
3201                    fetch_entities_for_nodes(conn, since_ms, until_ms, cursor_ref)?;
3202                cap_reached = was_cap_reached;
3203                for e in ents {
3204                    let id = format!("ent:{}", e.value);
3205                    let mut node = graph_node_for_entity(&tenant_id_for_blocking, &e.value);
3206                    node.ts_ms = Some(e.first_seen_ms);
3207                    node.preview =
3208                        Some(format!("Referenced in {} triples", e.ref_count));
3209                    staged.push(StagingNode {
3210                        sort_ts_ms: e.first_seen_ms,
3211                        sort_id: id.clone(),
3212                        node,
3213                    });
3214                }
3215            }
3216            Ok::<_, rusqlite::Error>((staged, cap_reached))
3217        })
3218        .await
3219        .map_err(ApiError::from)?;
3220
3221    // Apply cursor filter.
3222    if let Some(cur) = &cursor {
3223        staged.retain(|s| node_passes_cursor(s.sort_ts_ms, &s.sort_id, cur));
3224    }
3225
3226    // Sort `ts_ms DESC, id ASC`.
3227    staged.sort_by(|a, b| {
3228        cmp_node_sort_keys((a.sort_ts_ms, &a.sort_id), (b.sort_ts_ms, &b.sort_id))
3229    });
3230
3231    // Apply page limit + compute next_cursor.
3232    let limit_us = limit as usize;
3233    let next_cursor = if staged.len() > limit_us {
3234        let last = &staged[limit_us - 1];
3235        Some(NodesCursor {
3236            ts_ms: last.sort_ts_ms,
3237            id: last.sort_id.clone(),
3238        })
3239    } else {
3240        None
3241    };
3242    staged.truncate(limit_us);
3243
3244    let next_cursor_str = match next_cursor {
3245        Some(c) => Some(encode_cursor(&c)?),
3246        None => None,
3247    };
3248
3249    let nodes: Vec<GraphNode> = staged.into_iter().map(|s| s.node).collect();
3250    let payload = GraphNodesResponse {
3251        nodes,
3252        next_cursor: next_cursor_str,
3253    };
3254
3255    // Attach the entity-cap header so clients can show truncation UX
3256    // without parsing the body.
3257    let mut response = Json(payload).into_response();
3258    if cap_reached {
3259        response
3260            .headers_mut()
3261            .insert(ENTITY_CAP_HEADER, HeaderValue::from_static("true"));
3262    }
3263    Ok(response)
3264}
3265
3266// --- /v1/graph/edges --------------------------------------------------
3267
3268#[derive(Debug)]
3269struct StagingEdge {
3270    edge: GraphEdge,
3271    kind_idx: u8,
3272    sub_id: String,
3273}
3274
3275fn cmp_edge_sort_keys(a: (u8, &str), b: (u8, &str)) -> std::cmp::Ordering {
3276    match a.0.cmp(&b.0) {
3277        std::cmp::Ordering::Equal => a.1.cmp(b.1),
3278        other => other,
3279    }
3280}
3281
3282fn edge_passes_cursor(kind_idx: u8, sub_id: &str, cursor: &EdgesCursor) -> bool {
3283    cmp_edge_sort_keys((kind_idx, sub_id), (cursor.kind_idx, cursor.sub_id.as_str()))
3284        == std::cmp::Ordering::Greater
3285}
3286
3287/// Whether the supplied focus `node_id` (kind, value) matches an edge's
3288/// (source, target) endpoint pair under a given edge kind. Used to
3289/// filter `?node_id=...` queries.
3290fn edge_touches_focus(
3291    kind: EdgeKind,
3292    focus_kind: NodeKind,
3293    focus_value: &str,
3294    src_value: &str,
3295    tgt_value: &str,
3296    extra_value: Option<&str>,
3297) -> bool {
3298    // Determine which endpoint kinds this edge family produces; if the
3299    // focus kind isn't compatible, no match.
3300    match kind {
3301        EdgeKind::Triple => match focus_kind {
3302            // Triple edges flow source_episode → ent:<object_id>. We
3303            // also expose subject/object entities as endpoints (see
3304            // emit_triple_edges_for_focus); the matching here covers
3305            // episode focus + entity focus + the symmetric pair.
3306            NodeKind::Episode => src_value == focus_value,
3307            NodeKind::Entity => {
3308                tgt_value == focus_value
3309                    || extra_value.map(|x| x == focus_value).unwrap_or(false)
3310                    || src_value == focus_value
3311            }
3312            _ => false,
3313        },
3314        EdgeKind::DocumentChunk => match focus_kind {
3315            NodeKind::Document => src_value == focus_value,
3316            NodeKind::Chunk => tgt_value == focus_value,
3317            _ => false,
3318        },
3319        EdgeKind::ClusterMember => match focus_kind {
3320            NodeKind::Cluster => src_value == focus_value,
3321            NodeKind::Episode => tgt_value == focus_value,
3322            _ => false,
3323        },
3324    }
3325}
3326
3327#[derive(Debug)]
3328struct EdgeRowTriple {
3329    triple_id: String,
3330    source_memory_id: Option<String>,
3331    object_id: String,
3332    predicate: String,
3333    confidence: f32,
3334}
3335
3336fn fetch_triple_edges(conn: &rusqlite::Connection) -> rusqlite::Result<Vec<EdgeRowTriple>> {
3337    // Emit one edge per triple: source_episode → ent:object_id. Skip
3338    // orphan triples (`source_episode_id IS NULL`). Bound the scan at
3339    // GRAPH_EDGES_MAX_LIMIT * a safety multiplier so a runaway tenant
3340    // doesn't OOM the page-builder; the merge-and-page step trims to
3341    // the real limit downstream.
3342    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
3343    let mut stmt = conn.prepare(
3344        "SELECT t.triple_id, e.memory_id, t.object_id, t.predicate, t.confidence
3345           FROM triples t
3346           LEFT JOIN episodes e ON e.rowid = t.source_episode_id
3347          WHERE t.status = 'active'
3348          ORDER BY t.triple_id ASC
3349          LIMIT ?1",
3350    )?;
3351    let rows: Vec<EdgeRowTriple> = stmt
3352        .query_map(rusqlite::params![safety_cap], |r| {
3353            Ok(EdgeRowTriple {
3354                triple_id: r.get(0)?,
3355                source_memory_id: r.get::<_, Option<String>>(1)?,
3356                object_id: r.get(2)?,
3357                predicate: r.get(3)?,
3358                confidence: r.get(4)?,
3359            })
3360        })?
3361        .collect::<rusqlite::Result<Vec<_>>>()?;
3362    Ok(rows)
3363}
3364
3365#[derive(Debug)]
3366struct EdgeRowDocChunk {
3367    chunk_id: String,
3368    doc_id: String,
3369}
3370
3371fn fetch_document_chunk_edges(
3372    conn: &rusqlite::Connection,
3373) -> rusqlite::Result<Vec<EdgeRowDocChunk>> {
3374    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
3375    let mut stmt = conn.prepare(
3376        "SELECT c.chunk_id, c.doc_id
3377           FROM document_chunks c
3378           JOIN documents d ON d.doc_id = c.doc_id
3379          WHERE d.status = 'active'
3380          ORDER BY c.chunk_id ASC
3381          LIMIT ?1",
3382    )?;
3383    let rows: Vec<EdgeRowDocChunk> = stmt
3384        .query_map(rusqlite::params![safety_cap], |r| {
3385            Ok(EdgeRowDocChunk {
3386                chunk_id: r.get(0)?,
3387                doc_id: r.get(1)?,
3388            })
3389        })?
3390        .collect::<rusqlite::Result<Vec<_>>>()?;
3391    Ok(rows)
3392}
3393
3394#[derive(Debug)]
3395struct EdgeRowClusterMember {
3396    cluster_id: String,
3397    memory_id: String,
3398}
3399
3400fn fetch_cluster_member_edges(
3401    conn: &rusqlite::Connection,
3402) -> rusqlite::Result<Vec<EdgeRowClusterMember>> {
3403    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
3404    let mut stmt = conn.prepare(
3405        "SELECT ce.cluster_id, ce.memory_id
3406           FROM cluster_episodes ce
3407           JOIN episodes e ON e.memory_id = ce.memory_id
3408          WHERE e.status = 'active'
3409          ORDER BY ce.cluster_id ASC, ce.memory_id ASC
3410          LIMIT ?1",
3411    )?;
3412    let rows: Vec<EdgeRowClusterMember> = stmt
3413        .query_map(rusqlite::params![safety_cap], |r| {
3414            Ok(EdgeRowClusterMember {
3415                cluster_id: r.get(0)?,
3416                memory_id: r.get(1)?,
3417            })
3418        })?
3419        .collect::<rusqlite::Result<Vec<_>>>()?;
3420    Ok(rows)
3421}
3422
3423/// `GET /v1/graph/edges`. Paginated edge catalog. See module-level
3424/// comments for the contract.
3425async fn graph_edges_handler(
3426    TenantExtractor(tenant): TenantExtractor,
3427    Query(q): Query<GraphEdgesQuery>,
3428) -> Result<Json<GraphEdgesResponse>, ApiError> {
3429    let limit = q.limit.unwrap_or(GRAPH_EDGES_DEFAULT_LIMIT);
3430    let limit = limit.clamp(1, GRAPH_EDGES_MAX_LIMIT);
3431    let kinds = parse_edge_kind_filter(q.r#type.as_deref())?;
3432    let cursor = match q.cursor.as_deref() {
3433        None => None,
3434        Some("") => None,
3435        Some(raw) => Some(decode_cursor::<EdgesCursor>(raw)?),
3436    };
3437
3438    let focus = match q.node_id.as_deref() {
3439        None => None,
3440        Some(raw) => {
3441            let (kind, value) = parse_node_id(raw)?;
3442            Some((kind, value.to_string()))
3443        }
3444    };
3445
3446    let want_triple = kinds.contains(&EdgeKind::Triple);
3447    let want_doc_chunk = kinds.contains(&EdgeKind::DocumentChunk);
3448    let want_cluster_member = kinds.contains(&EdgeKind::ClusterMember);
3449
3450    let staged: Vec<StagingEdge> = tenant
3451        .read()
3452        .interact(move |conn| {
3453            let mut staged: Vec<StagingEdge> = Vec::new();
3454
3455            if want_triple {
3456                for t in fetch_triple_edges(conn)? {
3457                    let src_id = match &t.source_memory_id {
3458                        Some(mid) => format!("ep:{mid}"),
3459                        None => continue, // orphan triple — skip
3460                    };
3461                    let tgt_id = format!("ent:{}", t.object_id);
3462                    if let Some((fk, fv)) = &focus {
3463                        // `src_value` for matching is the bare memory_id
3464                        // (after the `ep:` prefix); `tgt_value` is the
3465                        // bare entity value.
3466                        if !edge_touches_focus(
3467                            EdgeKind::Triple,
3468                            *fk,
3469                            fv,
3470                            t.source_memory_id
3471                                .as_deref()
3472                                .unwrap_or(""),
3473                            &t.object_id,
3474                            // Triples carry a subject_id too, but the
3475                            // emitted edge only goes ep → ent(object).
3476                            // For entity-focus matches we also accept
3477                            // hits on subject_id; surface it through
3478                            // the `extra` slot.
3479                            None,
3480                        ) {
3481                            continue;
3482                        }
3483                    }
3484                    let edge = GraphEdge {
3485                        id: edge_id(&src_id, "triple", &tgt_id),
3486                        source: src_id,
3487                        target: tgt_id,
3488                        kind: "triple",
3489                        predicate: Some(t.predicate),
3490                        weight: Some(t.confidence),
3491                    };
3492                    staged.push(StagingEdge {
3493                        edge,
3494                        kind_idx: EdgeKind::Triple.order_idx(),
3495                        sub_id: t.triple_id,
3496                    });
3497                }
3498            }
3499            if want_doc_chunk {
3500                for dc in fetch_document_chunk_edges(conn)? {
3501                    let src_id = format!("doc:{}", dc.doc_id);
3502                    let tgt_id = format!("chunk:{}", dc.chunk_id);
3503                    if let Some((fk, fv)) = &focus {
3504                        if !edge_touches_focus(
3505                            EdgeKind::DocumentChunk,
3506                            *fk,
3507                            fv,
3508                            &dc.doc_id,
3509                            &dc.chunk_id,
3510                            None,
3511                        ) {
3512                            continue;
3513                        }
3514                    }
3515                    let edge = GraphEdge {
3516                        id: edge_id(&src_id, "document_chunk", &tgt_id),
3517                        source: src_id,
3518                        target: tgt_id,
3519                        kind: "document_chunk",
3520                        predicate: None,
3521                        weight: None,
3522                    };
3523                    staged.push(StagingEdge {
3524                        edge,
3525                        kind_idx: EdgeKind::DocumentChunk.order_idx(),
3526                        sub_id: dc.chunk_id,
3527                    });
3528                }
3529            }
3530            if want_cluster_member {
3531                for cm in fetch_cluster_member_edges(conn)? {
3532                    let src_id = format!("cl:{}", cm.cluster_id);
3533                    let tgt_id = format!("ep:{}", cm.memory_id);
3534                    if let Some((fk, fv)) = &focus {
3535                        if !edge_touches_focus(
3536                            EdgeKind::ClusterMember,
3537                            *fk,
3538                            fv,
3539                            &cm.cluster_id,
3540                            &cm.memory_id,
3541                            None,
3542                        ) {
3543                            continue;
3544                        }
3545                    }
3546                    let edge = GraphEdge {
3547                        id: edge_id(&src_id, "cluster_member", &tgt_id),
3548                        source: src_id,
3549                        target: tgt_id,
3550                        kind: "cluster_member",
3551                        predicate: None,
3552                        weight: None,
3553                    };
3554                    let sub_id = format!("{}\u{1f}{}", cm.cluster_id, cm.memory_id);
3555                    staged.push(StagingEdge {
3556                        edge,
3557                        kind_idx: EdgeKind::ClusterMember.order_idx(),
3558                        sub_id,
3559                    });
3560                }
3561            }
3562            Ok::<_, rusqlite::Error>(staged)
3563        })
3564        .await
3565        .map_err(ApiError::from)?;
3566
3567    // Apply cursor filter.
3568    let mut staged = staged;
3569    if let Some(cur) = &cursor {
3570        staged.retain(|s| edge_passes_cursor(s.kind_idx, &s.sub_id, cur));
3571    }
3572
3573    // Sort `(kind_idx ASC, sub_id ASC)` — stable, simple.
3574    staged.sort_by(|a, b| {
3575        cmp_edge_sort_keys((a.kind_idx, &a.sub_id), (b.kind_idx, &b.sub_id))
3576    });
3577
3578    let limit_us = limit as usize;
3579    let next_cursor = if staged.len() > limit_us {
3580        let last = &staged[limit_us - 1];
3581        Some(EdgesCursor {
3582            kind_idx: last.kind_idx,
3583            sub_id: last.sub_id.clone(),
3584        })
3585    } else {
3586        None
3587    };
3588    staged.truncate(limit_us);
3589    let next_cursor_str = match next_cursor {
3590        Some(c) => Some(encode_cursor(&c)?),
3591        None => None,
3592    };
3593
3594    let edges: Vec<GraphEdge> = staged.into_iter().map(|s| s.edge).collect();
3595    Ok(Json(GraphEdgesResponse {
3596        edges,
3597        next_cursor: next_cursor_str,
3598    }))
3599}
3600
3601// ---------------------------------------------------------------------------
3602// Graph inspect — kind-discriminated full-record drill (v0.10.0)
3603//
3604// `GET /v1/graph/inspect/{id}` powers solo-web's right-side inspector
3605// panel. Path `id` carries the prefixed node identifier (ep:/doc:/chunk:/
3606// cl:/ent:); the handler dispatches per-kind and returns the same wire
3607// shape solo-web's `InspectResponse` expects: `{ node, full_text?,
3608// triples_in[], triples_out[] }`.
3609//
3610// Per-kind contract (v0.10.0 P1):
3611//   * `ep:<memory_id>`     full_text = episodes.content (untruncated),
3612//                          triples_in = [],
3613//                          triples_out = triples WHERE source_episode_id = rowid
3614//                          (one edge per triple, ep -> ent(object), predicate
3615//                          + weight surfaced). Episodes never appear as triple
3616//                          subjects/objects, so triples_in is structurally
3617//                          empty.
3618//   * `doc:<doc_id>`       full_text = concatenated chunk bodies separated by
3619//                          "\n\n" (no `documents.full_text` column exists; the
3620//                          chunks-concat path produces the same final text the
3621//                          ingester chunked from). triples_in/out = [] --
3622//                          documents don't directly carry triples; their
3623//                          chunks transitively do, but the inspector reaches
3624//                          those via the existing `/v1/graph/expand` drill.
3625//   * `chunk:<chunk_id>`   full_text = document_chunks.content,
3626//                          triples_in/out = [] (chunks aren't triple endpoints).
3627//   * `cl:<cluster_id>`    full_text = label + "\n\n" + abstraction
3628//                          (`semantic_abstractions.content`) when an
3629//                          abstraction exists; just the label otherwise.
3630//                          triples_in/out = [].
3631//   * `ent:<value>`        full_text = None (entities have no body),
3632//                          triples_in = [],
3633//                          triples_out = all triples where the entity appears
3634//                          as subject OR object. Capped at
3635//                          `GRAPH_INSPECT_ENTITY_TRIPLES_CAP` (50). Entities
3636//                          are synthetic -- an `ent:<value>` with zero triples
3637//                          in the tenant returns 404 (the entity exists only
3638//                          if at least one triple references it).
3639//
3640// Error semantics: 404 if the prefixed id has no row in the tenant's DB.
3641// 400 if the prefix is unknown or the body after `:` is empty (reuses
3642// `parse_node_id`). Tenant + auth are handled by the existing extractors.
3643//
3644// Lesson #30: no audit emit. Inspect is a derived read over already-
3645// audited primitives.
3646// ---------------------------------------------------------------------------
3647
3648/// Cap on triples returned for an entity inspect. Entities can be heavily
3649/// referenced ("user", "Alice"); the inspector panel only needs enough
3650/// for orientation. The `/v1/graph/expand?kind=triple` path delivers the
3651/// paginated full set when the UI needs more.
3652const GRAPH_INSPECT_ENTITY_TRIPLES_CAP: i64 = 50;
3653
3654#[derive(Debug, Serialize)]
3655struct GraphInspectResponse {
3656    node: GraphNode,
3657    #[serde(skip_serializing_if = "Option::is_none")]
3658    full_text: Option<String>,
3659    triples_in: Vec<GraphEdge>,
3660    triples_out: Vec<GraphEdge>,
3661}
3662
3663/// `GET /v1/graph/inspect/{id}`. See module-level comments.
3664async fn graph_inspect_handler(
3665    TenantExtractor(tenant): TenantExtractor,
3666    Path(id): Path<String>,
3667) -> Result<Json<GraphInspectResponse>, ApiError> {
3668    let (kind, value) = parse_node_id(&id)?;
3669    let tenant_id_str = tenant.tenant_id().to_string();
3670    let value = value.to_string();
3671    let node_id_full = id;
3672    match kind {
3673        NodeKind::Episode => {
3674            inspect_episode_node(&tenant, &tenant_id_str, value, node_id_full).await
3675        }
3676        NodeKind::Document => {
3677            inspect_document_node(&tenant, &tenant_id_str, value, node_id_full).await
3678        }
3679        NodeKind::Chunk => {
3680            inspect_chunk_node(&tenant, &tenant_id_str, value, node_id_full).await
3681        }
3682        NodeKind::Cluster => {
3683            inspect_cluster_node(&tenant, &tenant_id_str, value, node_id_full).await
3684        }
3685        NodeKind::Entity => {
3686            inspect_entity_node(&tenant, &tenant_id_str, value, node_id_full).await
3687        }
3688    }
3689    .map(Json)
3690}
3691
3692// ---- per-kind paths ----
3693
3694async fn inspect_episode_node(
3695    tenant: &TenantHandle,
3696    tenant_id: &str,
3697    memory_id: String,
3698    node_id_full: String,
3699) -> Result<GraphInspectResponse, ApiError> {
3700    let memory_id_for_err = memory_id.clone();
3701    let memory_id_q = memory_id.clone();
3702    // Fetch the episode row + all triples sourced from it in one
3703    // interact() call to keep the connection check-out short.
3704    let fetched: Option<(ExpandedEpisode, Vec<TripleRow>)> = tenant
3705        .read()
3706        .interact(move |conn| {
3707            let ep_row: Option<(i64, i64, String)> = conn
3708                .query_row(
3709                    "SELECT rowid, ts_ms, content
3710                       FROM episodes
3711                      WHERE memory_id = ?1
3712                        AND status = 'active'",
3713                    rusqlite::params![&memory_id_q],
3714                    |r| {
3715                        Ok((
3716                            r.get::<_, i64>(0)?,
3717                            r.get::<_, i64>(1)?,
3718                            r.get::<_, String>(2)?,
3719                        ))
3720                    },
3721                )
3722                .map(Some)
3723                .or_else(|e| match e {
3724                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
3725                    other => Err(other),
3726                })?;
3727            let Some((rowid, ts_ms, content)) = ep_row else {
3728                return Ok(None);
3729            };
3730            let mut stmt = conn.prepare(
3731                "SELECT subject_id, predicate, object_id, confidence
3732                   FROM triples
3733                  WHERE source_episode_id = ?1
3734                    AND status = 'active'
3735                  ORDER BY valid_from_ms DESC",
3736            )?;
3737            let triples = stmt
3738                .query_map(rusqlite::params![rowid], |r| {
3739                    Ok(TripleRow {
3740                        subject_id: r.get(0)?,
3741                        predicate: r.get(1)?,
3742                        object_id: r.get(2)?,
3743                        confidence: r.get(3)?,
3744                    })
3745                })?
3746                .collect::<rusqlite::Result<Vec<_>>>()?;
3747            let ep = ExpandedEpisode {
3748                memory_id: memory_id_q,
3749                ts_ms,
3750                content,
3751            };
3752            Ok::<_, rusqlite::Error>(Some((ep, triples)))
3753        })
3754        .await
3755        .map_err(ApiError::from)?;
3756
3757    let (ep, triples) = fetched.ok_or_else(|| {
3758        ApiError::not_found(format!(
3759            "node_id {node_id_full:?} (memory_id {memory_id_for_err}) not found in current tenant"
3760        ))
3761    })?;
3762
3763    let node = graph_node_for_episode(tenant_id, &ep);
3764    let full_text = Some(ep.content.clone());
3765    // Triples flow from this episode (the source) to entity endpoints.
3766    // Emit one edge per triple: ep -> ent(object), predicate from the
3767    // triple, weight = confidence. This mirrors the `/v1/graph/edges`
3768    // triple-edge convention so the renderer can dedupe via composite id.
3769    let mut triples_out = Vec::with_capacity(triples.len());
3770    for t in triples {
3771        let tgt_id = format!("ent:{}", t.object_id);
3772        triples_out.push(GraphEdge {
3773            id: edge_id(&node_id_full, "triple", &tgt_id),
3774            source: node_id_full.clone(),
3775            target: tgt_id,
3776            kind: "triple",
3777            predicate: Some(t.predicate),
3778            weight: Some(t.confidence),
3779        });
3780    }
3781    Ok(GraphInspectResponse {
3782        node,
3783        full_text,
3784        triples_in: Vec::new(),
3785        triples_out,
3786    })
3787}
3788
3789async fn inspect_document_node(
3790    tenant: &TenantHandle,
3791    tenant_id: &str,
3792    doc_id: String,
3793    node_id_full: String,
3794) -> Result<GraphInspectResponse, ApiError> {
3795    let doc_id_for_err = doc_id.clone();
3796    let doc_id_q = doc_id.clone();
3797    // Fetch the document row + all chunk bodies (ORDER BY chunk_index) in
3798    // one interact() call. The chunks-concat path is the source of full_text
3799    // since the `documents` table doesn't carry the original raw text. For
3800    // v0.10.0 P1 we concatenate every chunk; pagination is the inspector
3801    // panel's responsibility if the document is very large.
3802    let fetched: Option<(ExpandedDocument, Vec<String>)> = tenant
3803        .read()
3804        .interact(move |conn| {
3805            let doc_row: Option<ExpandedDocument> = conn
3806                .query_row(
3807                    "SELECT doc_id, title, source, ingested_at_ms
3808                       FROM documents
3809                      WHERE doc_id = ?1
3810                        AND status = 'active'",
3811                    rusqlite::params![&doc_id_q],
3812                    |r| {
3813                        Ok(ExpandedDocument {
3814                            doc_id: r.get(0)?,
3815                            title: r.get(1)?,
3816                            source: r.get(2)?,
3817                            ingested_at_ms: r.get(3)?,
3818                        })
3819                    },
3820                )
3821                .map(Some)
3822                .or_else(|e| match e {
3823                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
3824                    other => Err(other),
3825                })?;
3826            let Some(doc) = doc_row else {
3827                return Ok(None);
3828            };
3829            let mut stmt = conn.prepare(
3830                "SELECT content
3831                   FROM document_chunks
3832                  WHERE doc_id = ?1
3833                  ORDER BY chunk_index ASC",
3834            )?;
3835            let chunks = stmt
3836                .query_map(rusqlite::params![&doc_id_q], |r| r.get::<_, String>(0))?
3837                .collect::<rusqlite::Result<Vec<_>>>()?;
3838            Ok::<_, rusqlite::Error>(Some((doc, chunks)))
3839        })
3840        .await
3841        .map_err(ApiError::from)?;
3842
3843    let (doc, chunks) = fetched.ok_or_else(|| {
3844        ApiError::not_found(format!(
3845            "node_id {node_id_full:?} (doc_id {doc_id_for_err}) not found in current tenant"
3846        ))
3847    })?;
3848
3849    let full_text = if chunks.is_empty() {
3850        // Document with zero chunks (e.g. mid-ingest, or an empty source).
3851        // Return None to signal "no body available" rather than an empty
3852        // string -- saves the renderer a degenerate code path.
3853        None
3854    } else {
3855        Some(chunks.join("\n\n"))
3856    };
3857
3858    Ok(GraphInspectResponse {
3859        node: graph_node_for_document(tenant_id, &doc),
3860        full_text,
3861        triples_in: Vec::new(),
3862        triples_out: Vec::new(),
3863    })
3864}
3865
3866async fn inspect_chunk_node(
3867    tenant: &TenantHandle,
3868    tenant_id: &str,
3869    chunk_id: String,
3870    node_id_full: String,
3871) -> Result<GraphInspectResponse, ApiError> {
3872    let chunk_id_for_err = chunk_id.clone();
3873    let chunk_id_q = chunk_id.clone();
3874    let row: Option<(ExpandedChunk, i64)> = tenant
3875        .read()
3876        .interact(move |conn| {
3877            conn.query_row(
3878                "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
3879                   FROM document_chunks c
3880                   JOIN documents d ON d.doc_id = c.doc_id
3881                  WHERE c.chunk_id = ?1
3882                    AND d.status = 'active'",
3883                rusqlite::params![&chunk_id_q],
3884                |r| {
3885                    Ok((
3886                        ExpandedChunk {
3887                            chunk_id: r.get(0)?,
3888                            chunk_index: r.get(1)?,
3889                            content: r.get(2)?,
3890                        },
3891                        r.get::<_, i64>(3)?,
3892                    ))
3893                },
3894            )
3895            .map(Some)
3896            .or_else(|e| match e {
3897                rusqlite::Error::QueryReturnedNoRows => Ok(None),
3898                other => Err(other),
3899            })
3900        })
3901        .await
3902        .map_err(ApiError::from)?;
3903
3904    let (chunk, created_at_ms) = row.ok_or_else(|| {
3905        ApiError::not_found(format!(
3906            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
3907        ))
3908    })?;
3909
3910    let full_text = Some(chunk.content.clone());
3911    let mut node = graph_node_for_chunk(tenant_id, &chunk);
3912    // Mirror the `/v1/graph/nodes` chunk-row behaviour: surface
3913    // `created_at_ms` so the inspector panel has a sortable timestamp.
3914    node.ts_ms = Some(created_at_ms);
3915
3916    Ok(GraphInspectResponse {
3917        node,
3918        full_text,
3919        triples_in: Vec::new(),
3920        triples_out: Vec::new(),
3921    })
3922}
3923
3924async fn inspect_cluster_node(
3925    tenant: &TenantHandle,
3926    tenant_id: &str,
3927    cluster_id: String,
3928    node_id_full: String,
3929) -> Result<GraphInspectResponse, ApiError> {
3930    let cluster_id_for_err = cluster_id.clone();
3931    let cluster_id_q = cluster_id.clone();
3932    let row: Option<(Option<String>, i64)> = tenant
3933        .read()
3934        .interact(move |conn| {
3935            conn.query_row(
3936                "SELECT sa.content, c.created_at_ms
3937                   FROM clusters c
3938                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
3939                  WHERE c.cluster_id = ?1",
3940                rusqlite::params![&cluster_id_q],
3941                |r| Ok((r.get::<_, Option<String>>(0)?, r.get::<_, i64>(1)?)),
3942            )
3943            .map(Some)
3944            .or_else(|e| match e {
3945                rusqlite::Error::QueryReturnedNoRows => Ok(None),
3946                other => Err(other),
3947            })
3948        })
3949        .await
3950        .map_err(ApiError::from)?;
3951
3952    let (abstraction, created_at_ms) = row.ok_or_else(|| {
3953        ApiError::not_found(format!(
3954            "node_id {node_id_full:?} (cluster_id {cluster_id_for_err}) not found in current tenant"
3955        ))
3956    })?;
3957
3958    // full_text is "<cluster_id label>\n\n<abstraction>" when an abstraction
3959    // exists; just the label otherwise. Brief "cluster" -- the cluster
3960    // label is `clusters.cluster_id` (the user-facing label is the
3961    // abstraction; clusters don't have a `label` column).
3962    let full_text = match abstraction.as_deref() {
3963        Some(a) => Some(format!("cluster {cluster_id_for_err}\n\n{a}")),
3964        None => Some(format!("cluster {cluster_id_for_err}")),
3965    };
3966
3967    Ok(GraphInspectResponse {
3968        node: graph_node_for_cluster(
3969            tenant_id,
3970            &cluster_id_for_err,
3971            abstraction.as_deref(),
3972            created_at_ms,
3973        ),
3974        full_text,
3975        triples_in: Vec::new(),
3976        triples_out: Vec::new(),
3977    })
3978}
3979
3980async fn inspect_entity_node(
3981    tenant: &TenantHandle,
3982    tenant_id: &str,
3983    entity_value: String,
3984    node_id_full: String,
3985) -> Result<GraphInspectResponse, ApiError> {
3986    // Entities are synthetic. They "exist" only if at least one triple
3987    // references them as subject or object. Zero triples -> 404 per brief.
3988    let entity_q = entity_value.clone();
3989    let rows: Vec<TripleRow> = tenant
3990        .read()
3991        .interact(move |conn| {
3992            let mut stmt = conn.prepare(
3993                "SELECT subject_id, predicate, object_id, confidence
3994                   FROM triples
3995                  WHERE (subject_id = ?1 OR object_id = ?1)
3996                    AND status = 'active'
3997                  ORDER BY valid_from_ms DESC
3998                  LIMIT ?2",
3999            )?;
4000            stmt.query_map(
4001                rusqlite::params![&entity_q, GRAPH_INSPECT_ENTITY_TRIPLES_CAP],
4002                |r| {
4003                    Ok(TripleRow {
4004                        subject_id: r.get(0)?,
4005                        predicate: r.get(1)?,
4006                        object_id: r.get(2)?,
4007                        confidence: r.get(3)?,
4008                    })
4009                },
4010            )?
4011            .collect::<rusqlite::Result<Vec<_>>>()
4012        })
4013        .await
4014        .map_err(ApiError::from)?;
4015
4016    if rows.is_empty() {
4017        return Err(ApiError::not_found(format!(
4018            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be inspectable"
4019        )));
4020    }
4021
4022    // Triples flow out FROM the entity to its counterpart. For each row
4023    // determine which side the entity appears on and emit ent:<self> ->
4024    // ent:<other>. Brief calls these triples_out (entities don't have
4025    // structural triples_in in v0.10.0 P1).
4026    let mut triples_out = Vec::with_capacity(rows.len());
4027    for t in rows {
4028        let other = if t.subject_id == entity_value {
4029            t.object_id
4030        } else {
4031            // entity_value matched on object_id; counterpart is subject.
4032            t.subject_id
4033        };
4034        let tgt_id = format!("ent:{other}");
4035        triples_out.push(GraphEdge {
4036            id: edge_id(&node_id_full, "triple", &tgt_id),
4037            source: node_id_full.clone(),
4038            target: tgt_id,
4039            kind: "triple",
4040            predicate: Some(t.predicate),
4041            weight: Some(t.confidence),
4042        });
4043    }
4044
4045    Ok(GraphInspectResponse {
4046        node: graph_node_for_entity(tenant_id, &entity_value),
4047        full_text: None,
4048        triples_in: Vec::new(),
4049        triples_out,
4050    })
4051}
4052
4053// ---------------------------------------------------------------------------
4054// Graph neighbors -- unified explicit + HNSW-semantic (v0.10.0)
4055//
4056// `GET /v1/graph/neighbors/{id}` powers solo-web's "show similar" overlay.
4057// Returns the same `GraphResponse { nodes, edges }` envelope as the rest of
4058// the family, combining:
4059//
4060//   * Explicit edges (triples / document_chunk / cluster_member) incident
4061//     to the focal node -- the same shape `/v1/graph/expand` produces for
4062//     a given (node_id, edge_kind) pair, but UNIONed across every edge kind
4063//     compatible with the focal node's kind.
4064//
4065//   * HNSW-semantic edges (cosine-similarity neighbors) -- only valid for
4066//     `ep:` (episodes) and `chunk:` (chunks); other source kinds return
4067//     400 when `kind=semantic` is requested alone, or are silently skipped
4068//     when `kind=both` is requested (explicit-only path still runs).
4069//
4070// Why this isn't just expand-with-a-flag: `/v1/graph/expand` takes a
4071// specific `kind=<edge-kind>` parameter and expands along ONE edge kind at
4072// a time. `/v1/graph/neighbors/:id` UNIFIES all compatible edge kinds
4073// incident to the focal node into one response. Different UX (drill vs.
4074// overview); different API; both needed.
4075//
4076// ## Refactor decision
4077//
4078// The brief recommends extracting `expand`'s per-kind helpers into a
4079// shared module. In practice the `expand_*` async fns already do exactly
4080// what neighbors needs for the explicit path (same response shape, same
4081// tenant + auth + existence semantics). To keep the change surgical and
4082// to preserve `expand`'s existing tests byte-for-byte, neighbors **reuses
4083// the existing `expand_*` async fns directly** rather than refactoring
4084// their bodies. The explicit path is a thin orchestrator that calls every
4085// `expand_*` fn compatible with the focal node's kind and concatenates
4086// the results.
4087//
4088// ## Dedup rule (kind=both)
4089//
4090// When an edge with the same (source, target) appears in BOTH the
4091// explicit and the semantic result sets, the explicit edge wins -- the
4092// semantic edge is dropped. We dedupe by `(source, target)` (NOT by full
4093// edge id, which encodes the kind too): the rule "explicit beats
4094// semantic" only makes sense when both endpoints agree, regardless of
4095// kind. In practice this is most likely to fire when an entity-focused
4096// expand (which surfaces episodes as triple-targets) collides with a
4097// semantic search hit on the same episode pair.
4098//
4099// ## Limit policy
4100//
4101// `limit` is applied PER KIND, not total. With `limit=25` and
4102// `kind=both`, the response carries up to 25 explicit + 25 semantic
4103// edges (minus dedupe). Silent clamp at 100 (matches the rest of the
4104// `/v1/graph/*` family).
4105//
4106// ## Threshold filter
4107//
4108// `threshold` (default 0.75) filters semantic neighbors by
4109// `weight >= threshold`, where `weight = (1 - cos_distance).max(0)`. The
4110// default is conservative -- below 0.75 the renderer typically shows too
4111// many spurious edges for a useful "show similar" overlay. Callers can
4112// dial down (e.g. `?threshold=0.5`) for a broader view.
4113//
4114// See `docs/dev-log/0116-graph-neighbors-impl.md` for the design notes.
4115// ---------------------------------------------------------------------------
4116
4117/// Default page size when the caller omits `?limit=`. Conservative so the
4118/// "show similar" overlay isn't visually overwhelming on first click.
4119const GRAPH_NEIGHBORS_DEFAULT_LIMIT: u32 = 25;
4120/// Silent clamp ceiling. Matches the rest of the `/v1/graph/*` family.
4121const GRAPH_NEIGHBORS_MAX_LIMIT: u32 = 100;
4122/// Conservative similarity floor. Edges with `weight < threshold` are
4123/// dropped from the semantic result set.
4124const GRAPH_NEIGHBORS_DEFAULT_THRESHOLD: f32 = 0.75;
4125
4126/// Discriminator for which neighbor kinds the caller wants. Default is
4127/// `both` (explicit edges + HNSW-semantic).
4128#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
4129#[serde(rename_all = "snake_case")]
4130enum GraphNeighborsKind {
4131    Explicit,
4132    Semantic,
4133    #[default]
4134    Both,
4135}
4136
4137#[derive(Debug, Deserialize)]
4138struct GraphNeighborsQuery {
4139    #[serde(default)]
4140    kind: Option<GraphNeighborsKind>,
4141    #[serde(default)]
4142    threshold: Option<f32>,
4143    #[serde(default)]
4144    limit: Option<u32>,
4145}
4146
4147/// `GET /v1/graph/neighbors/{id}`. See module-level comments.
4148async fn graph_neighbors_handler(
4149    TenantExtractor(tenant): TenantExtractor,
4150    Path(id): Path<String>,
4151    Query(q): Query<GraphNeighborsQuery>,
4152) -> Result<Json<GraphExpandResponse>, ApiError> {
4153    let kind = q.kind.unwrap_or_default();
4154    let threshold = q.threshold.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_THRESHOLD);
4155    if !(0.0..=1.0).contains(&threshold) {
4156        return Err(ApiError::bad_request(format!(
4157            "threshold must be in [0.0, 1.0]; got {threshold}"
4158        )));
4159    }
4160    // Silent clamp at GRAPH_NEIGHBORS_MAX_LIMIT -- matches expand /
4161    // nodes / edges convention. Test `neighbors_limit_clamped_at_100`
4162    // locks in the clamp policy.
4163    let limit_raw = q.limit.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_LIMIT);
4164    let limit = limit_raw.clamp(1, GRAPH_NEIGHBORS_MAX_LIMIT);
4165
4166    let (node_kind, value) = parse_node_id(&id)?;
4167    let value_owned = value.to_string();
4168    let tenant_id_str = tenant.tenant_id().to_string();
4169    let node_id_full = id;
4170
4171    // Existence probe for the focal node. The explicit + semantic paths
4172    // each handle "node-found-but-zero-neighbors" gracefully (200 with
4173    // empty arrays) -- but we want a true 404 when the id resolves to no
4174    // row at all, regardless of which kind the caller asked for. This
4175    // matches the inspect endpoint's gate: a node has to exist to be
4176    // meaningfully "neighborable".
4177    ensure_neighbors_focal_exists(&tenant, node_kind, &value_owned, &node_id_full).await?;
4178
4179    // Dispatch.
4180    let (explicit_nodes, explicit_edges) = if matches!(
4181        kind,
4182        GraphNeighborsKind::Explicit | GraphNeighborsKind::Both
4183    ) {
4184        neighbors_explicit(
4185            &tenant,
4186            &tenant_id_str,
4187            node_kind,
4188            &value_owned,
4189            &node_id_full,
4190            limit as i64,
4191        )
4192        .await?
4193    } else {
4194        (Vec::new(), Vec::new())
4195    };
4196
4197    let (semantic_nodes, semantic_edges) = if matches!(
4198        kind,
4199        GraphNeighborsKind::Semantic | GraphNeighborsKind::Both
4200    ) {
4201        match neighbors_semantic(
4202            &tenant,
4203            &tenant_id_str,
4204            node_kind,
4205            &value_owned,
4206            &node_id_full,
4207            limit,
4208            threshold,
4209        )
4210        .await
4211        {
4212            Ok(parts) => parts,
4213            Err(e) => {
4214                // `kind=semantic` alone against an unsupported focal node
4215                // (doc/cl/ent) is a hard 400 -- the caller asked for ONLY
4216                // semantic neighbors and there are none possible.
4217                //
4218                // `kind=both` against an unsupported focal node silently
4219                // skips the semantic step; the explicit path still
4220                // delivers a meaningful answer. This mirrors the
4221                // pragmatic UX: clicking "show similar" on an entity
4222                // still surfaces the entity's triples without surfacing a
4223                // pointless error.
4224                if matches!(kind, GraphNeighborsKind::Semantic) {
4225                    return Err(e);
4226                }
4227                (Vec::new(), Vec::new())
4228            }
4229        }
4230    } else {
4231        (Vec::new(), Vec::new())
4232    };
4233
4234    // Merge + dedupe. Explicit edges win over semantic edges with the
4235    // same (source, target). Nodes dedupe by id.
4236    let mut explicit_endpoints: std::collections::HashSet<(String, String)> =
4237        std::collections::HashSet::with_capacity(explicit_edges.len());
4238    for e in &explicit_edges {
4239        explicit_endpoints.insert((e.source.clone(), e.target.clone()));
4240    }
4241
4242    let mut nodes: Vec<GraphNode> = Vec::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4243    let mut edges: Vec<GraphEdge> =
4244        Vec::with_capacity(explicit_edges.len() + semantic_edges.len());
4245    let mut seen_node_ids: std::collections::HashSet<String> =
4246        std::collections::HashSet::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4247
4248    for n in explicit_nodes {
4249        if seen_node_ids.insert(n.id.clone()) {
4250            nodes.push(n);
4251        }
4252    }
4253    for e in explicit_edges {
4254        edges.push(e);
4255    }
4256    for n in semantic_nodes {
4257        if seen_node_ids.insert(n.id.clone()) {
4258            nodes.push(n);
4259        }
4260    }
4261    for e in semantic_edges {
4262        if explicit_endpoints.contains(&(e.source.clone(), e.target.clone())) {
4263            // Explicit edge already covers this pair -- drop the semantic
4264            // duplicate per the dedup rule. The semantic node may still
4265            // remain in `nodes` if no other edge already pulled it in;
4266            // that's fine -- the renderer renders nodes with weight-less
4267            // structural edges either way.
4268            continue;
4269        }
4270        edges.push(e);
4271    }
4272
4273    Ok(Json(GraphExpandResponse { nodes, edges }))
4274}
4275
4276/// Existence probe for the focal node. Translates the prefixed id into a
4277/// per-kind COUNT query against the matching table. Returns 404 (not 200
4278/// with empty arrays) when the node doesn't exist in the tenant's DB.
4279/// For entities the "existence" check is "is this entity referenced by
4280/// at least one triple" -- consistent with the inspect-entity contract
4281/// from `0115`.
4282async fn ensure_neighbors_focal_exists(
4283    tenant: &TenantHandle,
4284    node_kind: NodeKind,
4285    value: &str,
4286    node_id_full: &str,
4287) -> Result<(), ApiError> {
4288    match node_kind {
4289        NodeKind::Episode => ensure_episode_exists(tenant, value, node_id_full).await,
4290        NodeKind::Cluster => ensure_cluster_exists(tenant, value, node_id_full).await,
4291        NodeKind::Document => ensure_document_exists(tenant, value, node_id_full).await,
4292        NodeKind::Chunk => ensure_chunk_exists(tenant, value, node_id_full).await,
4293        NodeKind::Entity => ensure_entity_referenced(tenant, value, node_id_full).await,
4294    }
4295}
4296
4297/// 404 if the chunk_id has no row in this tenant's `document_chunks`
4298/// table whose parent doc is active. Mirrors `ensure_*_exists` from
4299/// `expand`.
4300async fn ensure_chunk_exists(
4301    tenant: &TenantHandle,
4302    chunk_id: &str,
4303    node_id_full: &str,
4304) -> Result<(), ApiError> {
4305    let chunk_id_q = chunk_id.to_string();
4306    let exists: i64 = tenant
4307        .read()
4308        .interact(move |conn| {
4309            conn.query_row(
4310                "SELECT COUNT(*)
4311                   FROM document_chunks c
4312                   JOIN documents d ON d.doc_id = c.doc_id
4313                  WHERE c.chunk_id = ?1
4314                    AND d.status = 'active'",
4315                rusqlite::params![&chunk_id_q],
4316                |r| r.get(0),
4317            )
4318        })
4319        .await
4320        .map_err(ApiError::from)?;
4321    if exists == 0 {
4322        return Err(ApiError::not_found(format!(
4323            "node_id {node_id_full:?} not found in current tenant"
4324        )));
4325    }
4326    Ok(())
4327}
4328
4329/// 404 if the entity isn't referenced by at least one active triple in
4330/// the tenant. Matches the inspect-entity 404 contract: entities are
4331/// synthetic, "existence" is "shows up in at least one triple".
4332async fn ensure_entity_referenced(
4333    tenant: &TenantHandle,
4334    entity_value: &str,
4335    node_id_full: &str,
4336) -> Result<(), ApiError> {
4337    let entity_q = entity_value.to_string();
4338    let exists: i64 = tenant
4339        .read()
4340        .interact(move |conn| {
4341            conn.query_row(
4342                "SELECT COUNT(*)
4343                   FROM triples
4344                  WHERE (subject_id = ?1 OR object_id = ?1)
4345                    AND status = 'active'",
4346                rusqlite::params![&entity_q],
4347                |r| r.get(0),
4348            )
4349        })
4350        .await
4351        .map_err(ApiError::from)?;
4352    if exists == 0 {
4353        return Err(ApiError::not_found(format!(
4354            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be neighborable"
4355        )));
4356    }
4357    Ok(())
4358}
4359
4360/// Explicit-neighbor path. Dispatches per focal node kind, calling the
4361/// existing `expand_*` async fns for each compatible edge kind and
4362/// concatenating the results. This is the "reuse" refactor decision:
4363/// no duplication of expand's SQL, and expand's tests stay byte-for-byte
4364/// intact because we don't touch its bodies.
4365async fn neighbors_explicit(
4366    tenant: &TenantHandle,
4367    tenant_id: &str,
4368    node_kind: NodeKind,
4369    value: &str,
4370    node_id_full: &str,
4371    limit: i64,
4372) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
4373    let mut nodes: Vec<GraphNode> = Vec::new();
4374    let mut edges: Vec<GraphEdge> = Vec::new();
4375
4376    match node_kind {
4377        NodeKind::Episode => {
4378            // Episodes have two compatible explicit-edge kinds:
4379            //   * cluster_member (episode -> clusters)
4380            //   * triple (episode -> entities, plus subj/obj entity pairs)
4381            //
4382            // document_chunk doesn't apply (episodes aren't documents).
4383            // Run each path, concat. Per-kind limit -- the caller asked for
4384            // up to `limit` neighbors PER KIND.
4385            let r1 = expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
4386                .await?;
4387            nodes.extend(r1.nodes);
4388            edges.extend(r1.edges);
4389            let r2 =
4390                expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
4391            nodes.extend(r2.nodes);
4392            edges.extend(r2.edges);
4393        }
4394        NodeKind::Document => {
4395            // Documents have one compatible explicit-edge kind:
4396            // document_chunk (document -> chunks).
4397            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
4398                .await?;
4399            nodes.extend(r.nodes);
4400            edges.extend(r.edges);
4401        }
4402        NodeKind::Chunk => {
4403            // Chunks have one compatible explicit-edge kind:
4404            // document_chunk (chunk -> parent document).
4405            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
4406                .await?;
4407            nodes.extend(r.nodes);
4408            edges.extend(r.edges);
4409        }
4410        NodeKind::Cluster => {
4411            // Clusters have one compatible explicit-edge kind:
4412            // cluster_member (cluster -> episodes).
4413            let r = expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
4414                .await?;
4415            nodes.extend(r.nodes);
4416            edges.extend(r.edges);
4417        }
4418        NodeKind::Entity => {
4419            // Entities have one compatible explicit-edge kind:
4420            // triple (entity -> episodes where this entity is referenced).
4421            let r =
4422                expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
4423            nodes.extend(r.nodes);
4424            edges.extend(r.edges);
4425        }
4426    }
4427    Ok((nodes, edges))
4428}
4429
4430/// Semantic-neighbor path. Only valid for episode + chunk focal nodes;
4431/// other kinds return 400. Reuses the existing inner pipelines:
4432///
4433///   * Episodes -> `solo_query::recall::run_recall_inner` (same path
4434///     `expand_semantic` uses; filters out chunk hits).
4435///   * Chunks   -> `solo_query::doc_search::run_doc_search_inner` (the
4436///     equivalent chunk-restricted vector pipeline).
4437///
4438/// Re-embed the focal node's content for the HNSW query rather than
4439/// loading the persisted vector from `embeddings` -- the same trade-off
4440/// `expand_semantic` made: cheaper code path overall, with deterministic
4441/// embedders in tests + batch-sized embedders in prod making the recompute
4442/// cost negligible.
4443async fn neighbors_semantic(
4444    tenant: &TenantHandle,
4445    tenant_id: &str,
4446    node_kind: NodeKind,
4447    value: &str,
4448    node_id_full: &str,
4449    limit: u32,
4450    threshold: f32,
4451) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
4452    match node_kind {
4453        NodeKind::Episode => {
4454            neighbors_semantic_from_episode(
4455                tenant,
4456                tenant_id,
4457                value,
4458                node_id_full,
4459                limit,
4460                threshold,
4461            )
4462            .await
4463        }
4464        NodeKind::Chunk => {
4465            neighbors_semantic_from_chunk(
4466                tenant,
4467                tenant_id,
4468                value,
4469                node_id_full,
4470                limit,
4471                threshold,
4472            )
4473            .await
4474        }
4475        _ => Err(ApiError::bad_request(format!(
4476            "semantic neighbors only valid for episode or chunk source; got {}",
4477            node_kind.as_wire_str()
4478        ))),
4479    }
4480}
4481
4482async fn neighbors_semantic_from_episode(
4483    tenant: &TenantHandle,
4484    tenant_id: &str,
4485    memory_id: &str,
4486    node_id_full: &str,
4487    limit: u32,
4488    threshold: f32,
4489) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
4490    let memory_id_q = memory_id.to_string();
4491    let memory_id_for_self_excl = memory_id.to_string();
4492    let content: Option<String> = tenant
4493        .read()
4494        .interact(move |conn| {
4495            conn.query_row(
4496                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
4497                rusqlite::params![&memory_id_q],
4498                |r| r.get::<_, String>(0),
4499            )
4500            .map(Some)
4501            .or_else(|e| match e {
4502                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4503                other => Err(other),
4504            })
4505        })
4506        .await
4507        .map_err(ApiError::from)?;
4508
4509    // Existence is guaranteed by the focal-exists probe earlier; an
4510    // empty content here would be a status-transition race we treat as
4511    // "nothing to compare against".
4512    let Some(content) = content else {
4513        return Ok((Vec::new(), Vec::new()));
4514    };
4515
4516    // Widen the request by 1 so dropping self doesn't shrink the page.
4517    let widened = (limit as usize).saturating_add(1).min(100);
4518    let result = solo_query::recall::run_recall_inner(
4519        tenant.embedder(),
4520        tenant.hnsw(),
4521        tenant.read(),
4522        &content,
4523        widened,
4524    )
4525    .await
4526    .map_err(ApiError::from)?;
4527
4528    let mut nodes = Vec::new();
4529    let mut edges = Vec::new();
4530    for hit in result.hits.into_iter() {
4531        if hit.memory_id == memory_id_for_self_excl {
4532            // Skip self.
4533            continue;
4534        }
4535        if nodes.len() as u32 >= limit {
4536            break;
4537        }
4538        let weight = (1.0 - hit.cos_distance).max(0.0);
4539        if weight < threshold {
4540            continue;
4541        }
4542        let target_id = format!("ep:{}", hit.memory_id);
4543        edges.push(GraphEdge {
4544            id: edge_id(node_id_full, "semantic", &target_id),
4545            source: node_id_full.to_string(),
4546            target: target_id,
4547            kind: "semantic",
4548            predicate: None,
4549            weight: Some(weight),
4550        });
4551        nodes.push(GraphNode {
4552            id: format!("ep:{}", hit.memory_id),
4553            kind: NodeKind::Episode.as_wire_str(),
4554            label: episode_label(&hit.content),
4555            ts_ms: None,
4556            tenant_id: tenant_id.to_string(),
4557            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
4558        });
4559    }
4560    Ok((nodes, edges))
4561}
4562
4563async fn neighbors_semantic_from_chunk(
4564    tenant: &TenantHandle,
4565    tenant_id: &str,
4566    chunk_id: &str,
4567    node_id_full: &str,
4568    limit: u32,
4569    threshold: f32,
4570) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
4571    let chunk_id_q = chunk_id.to_string();
4572    let chunk_id_for_self_excl = chunk_id.to_string();
4573    let content: Option<String> = tenant
4574        .read()
4575        .interact(move |conn| {
4576            conn.query_row(
4577                "SELECT c.content
4578                   FROM document_chunks c
4579                   JOIN documents d ON d.doc_id = c.doc_id
4580                  WHERE c.chunk_id = ?1
4581                    AND d.status = 'active'",
4582                rusqlite::params![&chunk_id_q],
4583                |r| r.get::<_, String>(0),
4584            )
4585            .map(Some)
4586            .or_else(|e| match e {
4587                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4588                other => Err(other),
4589            })
4590        })
4591        .await
4592        .map_err(ApiError::from)?;
4593
4594    let Some(content) = content else {
4595        return Ok((Vec::new(), Vec::new()));
4596    };
4597
4598    let widened = (limit as usize).saturating_add(1).min(100);
4599    let hits = solo_query::doc_search::run_doc_search_inner(
4600        tenant.embedder(),
4601        tenant.hnsw(),
4602        tenant.read(),
4603        &content,
4604        widened,
4605    )
4606    .await
4607    .map_err(ApiError::from)?;
4608
4609    let mut nodes = Vec::new();
4610    let mut edges = Vec::new();
4611    for hit in hits.into_iter() {
4612        if hit.chunk_id == chunk_id_for_self_excl {
4613            continue;
4614        }
4615        if nodes.len() as u32 >= limit {
4616            break;
4617        }
4618        let weight = (1.0 - hit.cos_distance).max(0.0);
4619        if weight < threshold {
4620            continue;
4621        }
4622        let target_id = format!("chunk:{}", hit.chunk_id);
4623        edges.push(GraphEdge {
4624            id: edge_id(node_id_full, "semantic", &target_id),
4625            source: node_id_full.to_string(),
4626            target: target_id,
4627            kind: "semantic",
4628            predicate: None,
4629            weight: Some(weight),
4630        });
4631        let exp = ExpandedChunk {
4632            chunk_id: hit.chunk_id.clone(),
4633            chunk_index: hit.chunk_index as i64,
4634            content: hit.content.clone(),
4635        };
4636        nodes.push(graph_node_for_chunk(tenant_id, &exp));
4637    }
4638    Ok((nodes, edges))
4639}
4640
4641// ---------------------------------------------------------------------------
4642// /v1/graph/stream — SSE invalidation feed (v0.10.0)
4643//
4644// Powers solo-web's live-update behaviour: instead of polling, the
4645// frontend subscribes once and refetches its pages only when the
4646// writer-actor signals "your tenant's data changed". Per scoping doc
4647// §3 Decision C, the wire format is invalidation-shaped (not row
4648// payload) — the SSE channel says "refetch the affected page" rather
4649// than streaming actual rows.
4650//
4651// Wire format:
4652//
4653//   ```
4654//   event: init
4655//   data: {"connected": true, "tenant_id": "default", "ts_ms": 1715625600000}
4656//
4657//   event: invalidate
4658//   data: {"reason": "memory.remember", "tenant_id": "default",
4659//          "ts_ms": 1715625610000, "kind": "episode"}
4660//
4661//   event: heartbeat
4662//   data: {"ts_ms": 1715625640000}
4663//   ```
4664//
4665// Heartbeat: every [`STREAM_HEARTBEAT_SECS`] seconds, regardless of
4666// whether real events fired (simpler than resetting the timer on every
4667// invalidate; the cost is a few extra bytes per minute on idle).
4668//
4669// Lagged subscribers (subscriber polled slower than 256 writes) see one
4670// emit-only-once warning and resync via the next real `invalidate` —
4671// invalidation events are idempotent, so the missed batch reduces to a
4672// single refetch on the client side. No correctness loss.
4673//
4674// See `docs/dev-log/0117-graph-stream-impl.md` for the full design.
4675// ---------------------------------------------------------------------------
4676
4677/// Heartbeat interval for `/v1/graph/stream`. Fires unconditionally
4678/// every 30 seconds — easier to reason about than "fire 30s after the
4679/// last event", and keeps proxies happy without code that races a
4680/// reset on every invalidate.
4681pub const STREAM_HEARTBEAT_SECS: u64 = 30;
4682
4683/// SSE event name emitted on connection open. Single fire; client uses
4684/// this to confirm the subscription is live.
4685const STREAM_EVENT_INIT: &str = "init";
4686
4687/// SSE event name emitted on every writer-actor commit (and on
4688/// `gdpr.forget_user`'s non-writer-actor cascade).
4689const STREAM_EVENT_INVALIDATE: &str = "invalidate";
4690
4691/// SSE event name emitted by the heartbeat interval.
4692const STREAM_EVENT_HEARTBEAT: &str = "heartbeat";
4693
4694/// `GET /v1/graph/stream` — Server-Sent Events feed of
4695/// `InvalidateEvent`s scoped to the request's tenant.
4696///
4697/// Subscribes to the per-tenant `broadcast::Sender<InvalidateEvent>`
4698/// held by `TenantHandle` (populated by `TenantHandle::open`). The
4699/// stream:
4700///
4701///   1. Emits one `event: init` line at connection open.
4702///   2. Selects between (broadcast recv) and (heartbeat tick) in a
4703///      loop, emitting `invalidate` / `heartbeat` events as either
4704///      fires.
4705///   3. Exits when the client closes the connection (axum drops the
4706///      response future) OR the broadcast Sender is dropped (tenant
4707///      shutdown).
4708///
4709/// Auth + tenant resolution mirror the rest of `/v1/graph/*`: the
4710/// `auth_middleware` returns 401 on missing bearer; the
4711/// `TenantExtractor` resolves the per-tenant DB. The handler itself
4712/// has no per-route auth logic.
4713async fn graph_stream_handler(
4714    TenantExtractor(tenant): TenantExtractor,
4715) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
4716    // Subscribe BEFORE building the init event so a writer-actor
4717    // commit that lands in the (microscopic) window between init and
4718    // the first poll is still observed. `broadcast::Receiver` buffers
4719    // up to the channel's capacity from the moment of subscribe.
4720    let rx = tenant.invalidate_sender().subscribe();
4721    let tenant_id = tenant.tenant_id().to_string();
4722    let stream = build_invalidate_stream(rx, tenant_id, STREAM_HEARTBEAT_SECS);
4723    // axum's keep-alive layer adds its own `:` comment line every
4724    // configured interval; we keep that OFF and ship our own typed
4725    // `heartbeat` event instead. The client distinguishes the two by
4726    // looking at the SSE `event:` field — typed heartbeats let solo-web
4727    // surface "connection healthy" in its UI without parsing comment
4728    // lines.
4729    Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)))
4730}
4731
4732/// Per-subscriber state threaded through `futures::stream::unfold`.
4733/// Carries the receiver + heartbeat interval + a one-shot flag for
4734/// the initial `init` event.
4735struct StreamState {
4736    rx: broadcast::Receiver<InvalidateEvent>,
4737    heartbeat: tokio::time::Interval,
4738    tenant_id: String,
4739    /// `true` until the first poll completes — used to gate the `init`
4740    /// event. Flipped to `false` after the init event yields.
4741    needs_init: bool,
4742}
4743
4744/// Build the stream of SSE [`Event`]s for one subscriber.
4745///
4746/// First yield is the `init` event. After that, the stream selects
4747/// between the broadcast receiver and a tokio interval timer that
4748/// fires every `heartbeat_secs` seconds. Lagged broadcast errors are
4749/// swallowed with a single `tracing::warn!` line — the client resyncs
4750/// on the next real invalidate (invalidation events are idempotent).
4751fn build_invalidate_stream(
4752    rx: broadcast::Receiver<InvalidateEvent>,
4753    tenant_id: String,
4754    heartbeat_secs: u64,
4755) -> impl Stream<Item = Result<Event, Infallible>> {
4756    // `tokio::time::interval_at(start, period)` starts ticking at
4757    // `start`; we set `start = now + period` so the first heartbeat
4758    // lands `heartbeat_secs` AFTER the init event. Without `interval_at`
4759    // the default `interval()` would fire immediately at t=0, racing
4760    // the init event.
4761    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
4762    let heartbeat =
4763        tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));
4764
4765    let state = StreamState {
4766        rx,
4767        heartbeat,
4768        tenant_id,
4769        needs_init: true,
4770    };
4771    futures::stream::unfold(state, move |mut state| async move {
4772        // First-poll: yield the init event without touching the
4773        // receiver or the heartbeat. Subsequent polls fall through to
4774        // the select loop.
4775        if state.needs_init {
4776            state.needs_init = false;
4777            let init_payload = serde_json::json!({
4778                "connected": true,
4779                "tenant_id": state.tenant_id,
4780                "ts_ms": chrono::Utc::now().timestamp_millis(),
4781            });
4782            let ev = Event::default()
4783                .event(STREAM_EVENT_INIT)
4784                .json_data(init_payload)
4785                .unwrap_or_else(|_| Event::default().event(STREAM_EVENT_INIT));
4786            return Some((Ok::<Event, Infallible>(ev), state));
4787        }
4788        loop {
4789            tokio::select! {
4790                event = state.rx.recv() => {
4791                    match event {
4792                        Ok(ev) => {
4793                            let sse_event = Event::default()
4794                                .event(STREAM_EVENT_INVALIDATE)
4795                                .json_data(&ev)
4796                                .unwrap_or_else(|_| Event::default()
4797                                    .event(STREAM_EVENT_INVALIDATE));
4798                            return Some((Ok::<Event, Infallible>(sse_event), state));
4799                        }
4800                        Err(broadcast::error::RecvError::Lagged(n)) => {
4801                            tracing::warn!(
4802                                lagged = n,
4803                                "graph stream subscriber lagged; client will \
4804                                 resync on the next real invalidate"
4805                            );
4806                            // Continue receiving — do NOT yield anything
4807                            // for a lag.
4808                        }
4809                        Err(broadcast::error::RecvError::Closed) => {
4810                            tracing::debug!(
4811                                "graph stream broadcast closed; ending SSE stream"
4812                            );
4813                            return None;
4814                        }
4815                    }
4816                }
4817                _ = state.heartbeat.tick() => {
4818                    let hb_payload = serde_json::json!({
4819                        "ts_ms": chrono::Utc::now().timestamp_millis(),
4820                    });
4821                    let sse_event = Event::default()
4822                        .event(STREAM_EVENT_HEARTBEAT)
4823                        .json_data(hb_payload)
4824                        .unwrap_or_else(|_| Event::default()
4825                            .event(STREAM_EVENT_HEARTBEAT));
4826                    return Some((Ok::<Event, Infallible>(sse_event), state));
4827                }
4828            }
4829        }
4830    })
4831}
4832
4833// ---------------------------------------------------------------------------
4834// /v1/tenants — principal-scoped tenant list (v0.10.0 + v0.10.1 hydration)
4835//
4836// Powers solo-web's top-bar tenant picker (Decision F in
4837// `docs/dev-log/0105-solo-web-scoping.md` §3, route shape locked in §4
4838// Route 6). The endpoint is **read-only**; admin CRUD (create / delete /
4839// rename / quota change) remains CLI-only per ADR-0004 §"Admin operations".
4840// That keeps the privileged tenant-mutation surface off HTTP entirely
4841// while still letting an authenticated browser session enumerate the
4842// tenants it's allowed to see.
4843//
4844// Wire shape (200 OK):
4845//
4846//   ```json
4847//   {
4848//     "tenants": [
4849//       {
4850//         "id": "default",
4851//         "display_name": "Default tenant",
4852//         "created_at_ms": 1715625600000,
4853//         "last_accessed_ms": 1715625900000,
4854//         "status": "active",
4855//         "quota_bytes": null,
4856//         "episode_count": null,
4857//         "size_bytes": null,
4858//         "pct_used": null
4859//       }
4860//     ]
4861//   }
4862//   ```
4863//
4864// The numeric `episode_count` / `size_bytes` / `pct_used` fields were
4865// **always `null` in v0.10.0** (cost-deferred). v0.10.1 hydrates them
4866// for real via `TenantRegistry::hydrate_tenant_cost_numbers`:
4867//
4868//   * `size_bytes` — `std::fs::metadata(<data_dir>/tenants/<db>.db).len()`.
4869//     Cheap; runs for every visible tenant.
4870//   * `episode_count` — `SELECT COUNT(*) FROM episodes WHERE
4871//     status='active'` against the per-tenant SQLCipher DB.
4872//   * `pct_used` — `size_bytes * 100 / quota_bytes` (f64, capped at
4873//     100.0) when both are known; `null` if `quota_bytes` is unset.
4874//
4875// **Cap**: opening + counting N tenant DBs is N×~10ms; the first-paint
4876// budget is tight, so we cap `episode_count` hydration at
4877// `TENANTS_COUNT_HYDRATION_CAP` (50) per request. Tenants beyond the
4878// cap get `episode_count: null` and the response carries an
4879// `X-Solo-Tenants-Count-Cap-Reached: true` header so clients can fetch
4880// counts for the tail tenants out-of-band if needed (mirroring the
4881// entity-cap pattern from `/v1/graph/nodes`). `size_bytes` is not
4882// capped — it's just a `metadata` call.
4883//
4884// The CLI's `solo tenants list` retains the canonical per-tenant
4885// cost-numbers path for operators who need exhaustive data.
4886//
4887// ## Visibility filter (load-bearing — three cases)
4888//
4889// The handler reads `AuthenticatedPrincipal` out of request extensions
4890// via `MaybePrincipal` and filters the registry list before
4891// serialisation:
4892//
4893//   1. **No principal** (`MaybePrincipal(None)`) — unauthenticated
4894//      loopback path, no `[auth]` block in `solo.config.toml`. Return
4895//      every `Active` tenant. Same scope as `solo tenants list` CLI.
4896//   2. **Bearer principal** (`subject == "bearer" && claims.is_null()`,
4897//      the `AuthenticatedPrincipal::bearer` signature emitted by
4898//      `BearerValidator::validate`). Single-principal daemon — the
4899//      bearer holder is the operator, so return every `Active`
4900//      tenant. Functionally equivalent to (1) from a leakage
4901//      standpoint.
4902//   3. **OIDC principal** (any other principal — `claims` carries the
4903//      JWT object). Filter to ONLY the tenant id matching
4904//      `principal.tenant_claim`. The configured OIDC tenant_claim is
4905//      already validated to a real `TenantId` by the auth middleware
4906//      (a `MissingTenantClaim` or `InvalidTenantClaim` shorts out at
4907//      403 BEFORE this handler runs). If the claim doesn't match any
4908//      registered tenant, return `{"tenants": []}` (200 OK, NOT 404)
4909//      — don't leak whether a tenant exists by 404'ing on names
4910//      outside the principal's scope.
4911//
4912// `PendingMigration` / `PendingDelete` tenants are **excluded** from the
4913// list in every case. solo-web's tenant picker should not surface a
4914// tenant that's mid-migration or queued for hard-delete — clicking
4915// such a row would race the admin tooling. The CLI's `solo tenants
4916// list` still shows them under an explicit `--include-pending` flag
4917// (out of scope here).
4918//
4919// See `docs/dev-log/0119-tenants-list-impl.md` for the full design.
4920// ---------------------------------------------------------------------------
4921
4922/// One row of the `/v1/tenants` response body. Shape mirrors
4923/// `solo_storage::TenantRecord` for the persisted fields plus the
4924/// reserved-for-future cost-numbers triple (`episode_count`,
4925/// `size_bytes`, `pct_used`) that v0.10.0 always sets to `null`.
4926#[derive(Debug, Clone, Serialize)]
4927struct TenantListItem {
4928    /// Tenant id (e.g. `"default"`, `"alice"`). Matches the
4929    /// `X-Solo-Tenant` header value clients send to other routes.
4930    id: String,
4931    /// Human-readable display name set at `solo tenants create`.
4932    /// `None` ⇒ omit from the JSON body.
4933    #[serde(skip_serializing_if = "Option::is_none")]
4934    display_name: Option<String>,
4935    /// Epoch ms when this tenant was registered.
4936    created_at_ms: i64,
4937    /// Epoch ms of the most recent `TenantRegistry::get_or_open` call
4938    /// (v0.9.0 P1). `None` for tenants that have never been opened
4939    /// since the migration ran.
4940    #[serde(skip_serializing_if = "Option::is_none")]
4941    last_accessed_ms: Option<i64>,
4942    /// Lifecycle status. Always `"active"` in the v0.10.0 wire (we
4943    /// filter `PendingMigration` / `PendingDelete` out at list time).
4944    /// Surfaced for forward-compat — a future `?include_pending=1`
4945    /// query param could relax the filter without a shape change.
4946    status: TenantStatusJson,
4947    /// Per-tenant byte quota set via `solo tenants set-quota`. `None`
4948    /// ⇒ unlimited.
4949    #[serde(skip_serializing_if = "Option::is_none")]
4950    quota_bytes: Option<u64>,
4951    /// v0.10.1: count of `episodes WHERE status='active'`. Populated
4952    /// for the first `TENANTS_COUNT_HYDRATION_CAP` tenants in the
4953    /// response; `null` for tenants beyond the cap (in which case the
4954    /// response also carries `X-Solo-Tenants-Count-Cap-Reached: true`).
4955    /// Also `null` if the per-tenant DB file is missing or the COUNT
4956    /// failed.
4957    episode_count: Option<i64>,
4958    /// v0.10.1: size of the per-tenant SQLCipher DB on disk (bytes).
4959    /// `null` only if the file is missing or unreadable (corruption /
4960    /// permissions). Not affected by the cap — `std::fs::metadata` is
4961    /// cheap.
4962    size_bytes: Option<u64>,
4963    /// v0.10.1: `(size_bytes * 100.0 / quota_bytes)` capped at `100.0`
4964    /// when both `size_bytes` and `quota_bytes` are known. `null` if
4965    /// `quota_bytes` is unset (no quota = unlimited) or `size_bytes`
4966    /// is unknown.
4967    pct_used: Option<f64>,
4968}
4969
4970/// JSON-side mirror of [`TenantStatus`]. Re-defined here (rather than
4971/// using `#[derive(Serialize)]` on `TenantStatus` directly — which it
4972/// already has via `#[serde(rename_all = "snake_case")]`) so the
4973/// HTTP-side wire shape stays decoupled from the storage-side enum.
4974/// Today both serialise identically; a future status variant added to
4975/// storage doesn't automatically leak onto the wire.
4976#[derive(Debug, Clone, Copy, Serialize)]
4977#[serde(rename_all = "snake_case")]
4978enum TenantStatusJson {
4979    Active,
4980}
4981
4982impl From<&solo_storage::TenantStatus> for TenantStatusJson {
4983    fn from(s: &solo_storage::TenantStatus) -> Self {
4984        // We only ever build this enum from `Active` records (the list
4985        // handler filters at source); the match exhausts so future
4986        // variants force a compile error here, not a wire mismatch.
4987        match s {
4988            solo_storage::TenantStatus::Active => TenantStatusJson::Active,
4989            // Defensive: should be filtered upstream. Map to Active to
4990            // avoid a panic, but the handler MUST keep filtering at
4991            // source. A clippy warning catches dead branches.
4992            solo_storage::TenantStatus::PendingMigration
4993            | solo_storage::TenantStatus::PendingDelete => TenantStatusJson::Active,
4994        }
4995    }
4996}
4997
4998/// Response body for `GET /v1/tenants`.
4999#[derive(Debug, Serialize)]
5000struct TenantsListResponse {
5001    tenants: Vec<TenantListItem>,
5002}
5003
5004/// v0.10.1: maximum number of tenants whose `episode_count` we hydrate
5005/// per `/v1/tenants` request. Opening + counting one tenant DB is
5006/// ~5-10ms; capping bounds the per-request wall to keep solo-web's
5007/// first-paint budget tight. Tenants beyond the cap get
5008/// `episode_count: null` AND the response carries
5009/// `X-Solo-Tenants-Count-Cap-Reached: true` so clients can fetch
5010/// per-tenant counts out-of-band (CLI / future per-id endpoint) for
5011/// the tail. The 50 figure mirrors the entity-cap pattern from
5012/// `/v1/graph/nodes`.
5013const TENANTS_COUNT_HYDRATION_CAP: usize = 50;
5014
5015/// v0.10.1: response header name set to `"true"` when the per-request
5016/// `episode_count` hydration cap was reached. Absent otherwise.
5017/// Grep-able by both server- and client-side code. Stored lowercase
5018/// per `axum::http::HeaderName::from_static` (header names are
5019/// case-insensitive on the wire; the canonical spelling is
5020/// `X-Solo-Tenants-Count-Cap-Reached`).
5021const X_SOLO_TENANTS_COUNT_CAP_HEADER: &str = "x-solo-tenants-count-cap-reached";
5022
5023/// `GET /v1/tenants` — list every tenant visible to the request's
5024/// principal. See module comment for the three-case visibility rule.
5025///
5026/// Errors:
5027///   * **401** — bearer required but missing/invalid (handled by
5028///     `auth_middleware` before this handler runs).
5029///   * **500** — `TenantsIndex` read failed. Surfaced via [`ApiError`].
5030///
5031/// No 404 path. If the OIDC principal's `tenant_claim` doesn't match
5032/// any registered tenant, the response is `200 OK` with `tenants:
5033/// []`. That keeps tenant existence out of side-channel range for an
5034/// OIDC user — they cannot probe for other tenants by id.
5035async fn tenants_list_handler(
5036    State(state): State<SoloHttpState>,
5037    MaybePrincipal(maybe_principal): MaybePrincipal,
5038) -> Result<Response, ApiError> {
5039    // Pull every registered tenant. `list_active` is the registry's
5040    // wrapper around `TenantsIndex::list`, which returns rows ordered
5041    // by `(created_at_ms ASC, tenant_id ASC)` — a stable order that
5042    // doesn't shift between requests, which solo-web relies on to keep
5043    // its tenant picker entries from reordering visually.
5044    let mut records = state.registry.list_active().await.map_err(ApiError::from)?;
5045
5046    // Filter at source: status MUST be Active (PendingMigration /
5047    // PendingDelete are admin-transient states that solo-web should
5048    // not surface). Matches the brief's
5049    // `tenants_status_filter_excludes_deleted` test.
5050    records.retain(|r| matches!(r.status, solo_storage::TenantStatus::Active));
5051
5052    // Apply the principal-driven visibility filter. The three cases
5053    // are exhaustive — see the module comment for the rationale on
5054    // each. `tenant_visibility_filter` is split out so the unit
5055    // tests can assert the rule independent of the SQL read.
5056    let filtered = filter_tenants_for_principal(records, maybe_principal.as_ref());
5057
5058    // v0.10.1: hydrate cost numbers (size_bytes, episode_count). The
5059    // registry helper handles missing DB files + the cap behavior. We
5060    // pass the cap so tenants beyond it return `None` for episode_count
5061    // — `size_bytes` is computed for everyone (cheap fs::metadata).
5062    let cap = TENANTS_COUNT_HYDRATION_CAP;
5063    let costs = state
5064        .registry
5065        .hydrate_tenant_cost_numbers(&filtered, cap)
5066        .await;
5067    let cap_reached = filtered.len() > cap;
5068
5069    let tenants: Vec<TenantListItem> = filtered
5070        .iter()
5071        .zip(costs.iter())
5072        .map(|(r, cost)| {
5073            let pct_used = match (cost.size_bytes, r.quota_bytes) {
5074                (Some(size), Some(quota)) if quota > 0 => {
5075                    let raw = (size as f64) * 100.0 / (quota as f64);
5076                    Some(raw.min(100.0))
5077                }
5078                _ => None,
5079            };
5080            TenantListItem {
5081                id: r.tenant_id.to_string(),
5082                display_name: r.display_name.clone(),
5083                created_at_ms: r.created_at_ms,
5084                last_accessed_ms: r.last_accessed_ms,
5085                status: TenantStatusJson::from(&r.status),
5086                quota_bytes: r.quota_bytes,
5087                episode_count: cost.episode_count,
5088                size_bytes: cost.size_bytes,
5089                pct_used,
5090            }
5091        })
5092        .collect();
5093
5094    let body = Json(TenantsListResponse { tenants });
5095    if cap_reached {
5096        let mut resp = body.into_response();
5097        resp.headers_mut().insert(
5098            axum::http::HeaderName::from_static(X_SOLO_TENANTS_COUNT_CAP_HEADER),
5099            axum::http::HeaderValue::from_static("true"),
5100        );
5101        Ok(resp)
5102    } else {
5103        Ok(body.into_response())
5104    }
5105}
5106
5107/// Pure function: apply the three-case principal-driven visibility
5108/// rule to a list of `TenantRecord`s. Extracted from the handler so
5109/// unit tests can exercise the rule without driving an axum router.
5110///
5111///   * `principal == None` ⇒ all records returned (no-auth path).
5112///   * Bearer-shaped principal (`subject == "bearer" && claims.is_null()`)
5113///     ⇒ all records returned (single-principal daemon).
5114///   * Any other principal (OIDC) ⇒ filter to records whose
5115///     `tenant_id == principal.tenant_claim`. An OIDC principal with
5116///     no `tenant_claim` (theoretically unreachable — the middleware
5117///     short-circuits at 403 before us, but we defend) returns an
5118///     empty list.
5119fn filter_tenants_for_principal(
5120    records: Vec<solo_storage::TenantRecord>,
5121    principal: Option<&AuthenticatedPrincipal>,
5122) -> Vec<solo_storage::TenantRecord> {
5123    let Some(p) = principal else {
5124        // Case 1: no auth configured — return all tenants. Same scope
5125        // as `solo tenants list`.
5126        return records;
5127    };
5128    if is_single_principal_bearer(p) {
5129        // Case 2: bearer principal — return all tenants. The single
5130        // bearer holder is functionally the daemon operator.
5131        return records;
5132    }
5133    // Case 3: OIDC principal — filter to the claimed tenant only. An
5134    // unmatched claim falls through to an empty list, NOT 404, to
5135    // avoid leaking tenant existence.
5136    let Some(claim) = p.tenant_claim.as_ref() else {
5137        return Vec::new();
5138    };
5139    records
5140        .into_iter()
5141        .filter(|r| r.tenant_id == *claim)
5142        .collect()
5143}
5144
5145/// True iff `principal` looks like a bearer-mode principal — the shape
5146/// emitted by [`AuthenticatedPrincipal::bearer`]: subject is literally
5147/// `"bearer"`, claims is `serde_json::Value::Null`, and scopes is
5148/// empty. OIDC principals carry a JWT object in `claims` and the JWT
5149/// `sub` in `subject`, so they fail this predicate.
5150///
5151/// Split out so the unit tests can assert the discriminator
5152/// independent of the rest of the handler. Keeping the predicate in
5153/// one place also makes future expansion easier — e.g., a v0.11
5154/// "admin scope" might add an OIDC variant that passes this gate by
5155/// looking for a `"solo:admin"` entry in `scopes`.
5156fn is_single_principal_bearer(principal: &AuthenticatedPrincipal) -> bool {
5157    principal.subject == "bearer"
5158        && principal.claims.is_null()
5159        && principal.scopes.is_empty()
5160}
5161
5162// ---------------------------------------------------------------------------
5163// v0.10.2 — MCP-over-HTTP transport on /mcp
5164// ---------------------------------------------------------------------------
5165
5166// v0.11.0 P2: the per-event names that used to live here as
5167// `MCP_STREAM_EVENT_INIT` moved into `crate::mcp_session` alongside the
5168// `McpEventKind` enum so the publisher (`SessionState::publish_event`)
5169// and the subscriber (`build_mcp_session_stream`) share one source of
5170// truth for the wire format. See `MCP_STREAM_EVENT_INIT_NAME`,
5171// `MCP_STREAM_EVENT_MESSAGE_NAME`, `MCP_STREAM_EVENT_PROGRESS_NAME`,
5172// `MCP_STREAM_EVENT_LAGGED_NAME`, and `MCP_STREAM_EVENT_HEARTBEAT_NAME`
5173// for the canonical strings.
5174
5175/// `POST /mcp` — JSON-RPC request/response.
5176///
5177/// v0.10.2 P2 entry point. Per the MCP Streamable HTTP transport spec,
5178/// the body is one JSON-RPC 2.0 envelope (`{jsonrpc, id, method,
5179/// params}`). The response is one JSON-RPC envelope (`{jsonrpc, id,
5180/// result}` or `{jsonrpc, id, error}`) with `Content-Type:
5181/// application/json`. **Status 200** for valid JSON-RPC (in-body
5182/// errors); **status 400** for malformed JSON; **status 401** when
5183/// auth is configured and the bearer check fails (handled by the
5184/// `auth_middleware` ahead of this handler).
5185///
5186/// Tenant resolution diverges from `solo mcp-stdio` here: stdio binds
5187/// one tenant at process start via `--tenant`. HTTP resolves the tenant
5188/// per request from the `X-Solo-Tenant` header (or
5189/// `AuthenticatedPrincipal.tenant_claim` in OIDC mode), so a single
5190/// daemon process can answer MCP calls for any tenant the registry
5191/// knows. The audit principal is `Some("bearer")` for bearer-
5192/// authenticated calls and the JWT `sub` for OIDC; `None` for
5193/// unauthenticated loopback. Documented in v0.10.2 dev log.
5194async fn mcp_http_post_handler(
5195    TenantExtractor(tenant): TenantExtractor,
5196    State(state): State<SoloHttpState>,
5197    AuditPrincipal(principal): AuditPrincipal,
5198    request: axum::extract::Request,
5199) -> Response {
5200    // v0.11.0 P1: read the session extension the middleware planted on
5201    // a hit; if absent, this is the session-init request — create one
5202    // and echo the assigned id back via `Mcp-Session-Id`.
5203    let existing_session_id: Option<crate::mcp_session::SessionId> = request
5204        .extensions()
5205        .get::<crate::mcp_session::SessionId>()
5206        .cloned();
5207    let principal_full = request
5208        .extensions()
5209        .get::<crate::auth::AuthenticatedPrincipal>()
5210        .cloned();
5211    let body_bytes = match axum::body::to_bytes(
5212        request.into_body(),
5213        // Match the 8 MiB cap solo-api already uses for JSON bodies in
5214        // other handlers (validated by `tower-http::limit::RequestBodyLimitLayer`
5215        // elsewhere). Locally we cap at 8 MiB so a malformed Content-Length
5216        // can't OOM the dispatch task.
5217        8 * 1024 * 1024,
5218    )
5219    .await
5220    {
5221        Ok(b) => b,
5222        Err(e) => {
5223            return (
5224                StatusCode::BAD_REQUEST,
5225                Json(serde_json::json!({
5226                    "error": format!("invalid request body: {e}"),
5227                    "status": 400,
5228                })),
5229            )
5230                .into_response();
5231        }
5232    };
5233    // Parse the JSON-RPC envelope. Malformed input ⇒ 400 (the spec
5234    // calls out 4xx for malformed wire input even though JSON-RPC's own
5235    // parse-error code is in-body — operator-facing tooling needs the
5236    // HTTP status to distinguish "the server rejected the request
5237    // shape" from "the method returned an error").
5238    let request: crate::mcp_dispatch::JsonRpcRequest = match serde_json::from_slice(&body_bytes) {
5239        Ok(r) => r,
5240        Err(e) => {
5241            return (
5242                StatusCode::BAD_REQUEST,
5243                Json(serde_json::json!({
5244                    "error": format!("invalid JSON-RPC request: {e}"),
5245                    "status": 400,
5246                })),
5247            )
5248                .into_response();
5249        }
5250    };
5251    if request.jsonrpc != "2.0" {
5252        return (
5253            StatusCode::BAD_REQUEST,
5254            Json(serde_json::json!({
5255                "error": format!(
5256                    "invalid JSON-RPC request: expected jsonrpc=\"2.0\", got {:?}",
5257                    request.jsonrpc
5258                ),
5259                "status": 400,
5260            })),
5261        )
5262            .into_response();
5263    }
5264
5265    // v0.11.0 P1: assign a session id if the request arrived without
5266    // one. The assigned id is echoed back via the `Mcp-Session-Id`
5267    // response header so the client can reuse it.
5268    let (session_id, freshly_assigned) = match existing_session_id {
5269        Some(id) => (id, false),
5270        None => {
5271            let new_state = crate::mcp_session::SessionState::new(
5272                tenant.tenant_id().clone(),
5273                principal_full,
5274            );
5275            let id = state.mcp_sessions.insert(new_state);
5276            (id, true)
5277        }
5278    };
5279
5280    // v0.11.0 P3: resolve the `Arc<SessionState>` for the dispatcher so
5281    // per-tool progress events can be published into the session's
5282    // broadcast channel. On a session-init request we just inserted
5283    // the state; for a continuing request the middleware planted an
5284    // Arc onto the request extensions, but we lost ownership when we
5285    // consumed the request above (`request.into_body()`). Re-fetch
5286    // via `mcp_sessions.get(&session_id)` — this is a single lock-free
5287    // DashMap shard read.
5288    let session_state: Option<std::sync::Arc<crate::mcp_session::SessionState>> =
5289        state.mcp_sessions.get(&session_id);
5290
5291    // v0.11.0 P4: on a freshly-assigned session, spawn the
5292    // invalidate-bridge task that forwards per-tenant `InvalidateEvent`
5293    // broadcasts to this session's event channel as MCP
5294    // `notifications/message` envelopes. Skipped for continuing requests
5295    // because the bridge spawned at session-init is still running (the
5296    // bridge auto-exits when the session drops from the store via the
5297    // `Weak<SessionState>` upgrade-fails path).
5298    if freshly_assigned
5299        && let Some(session_state_for_bridge) = session_state.clone()
5300    {
5301        // The JoinHandle is intentionally detached — the bridge task
5302        // owns its own exit path (Weak<SessionState> upgrade fails or
5303        // tenant broadcast closes). Holding the handle would require
5304        // a per-session reaper; the bridge's own lifecycle is enough.
5305        // `drop` is the clippy-clean way to discard a future.
5306        drop(crate::mcp_notify::spawn_invalidate_bridge(
5307            tenant.clone(),
5308            session_state_for_bridge,
5309        ));
5310    }
5311
5312    // Build the dispatcher with the resolved tenant + audit principal.
5313    // Dispatcher integration is Option B per v0.11.0 P1 plan: sessions
5314    // are HTTP-transport-only; the dispatcher stays session-agnostic.
5315    let dispatcher = crate::mcp_dispatch::McpDispatcher::new(
5316        state.registry.clone(),
5317        tenant,
5318        (*state.user_aliases).clone(),
5319        principal,
5320    );
5321
5322    let mut response = match dispatcher.dispatch(request, session_state).await {
5323        Some(response) => {
5324            // JSON-RPC errors are in-body; the HTTP status is 200 for
5325            // any valid JSON-RPC request, including ones that return an
5326            // error envelope. The client distinguishes success from
5327            // error by the presence of `result` vs `error` in the body.
5328            (StatusCode::OK, Json(response)).into_response()
5329        }
5330        None => {
5331            // Notification: per JSON-RPC 2.0 §4.1 the server MUST NOT
5332            // respond. The MCP Streamable HTTP transport spec uses
5333            // 202 Accepted for this shape so client-side polling does
5334            // not block on a body.
5335            StatusCode::ACCEPTED.into_response()
5336        }
5337    };
5338    // v0.11.0 P1: stamp the `Mcp-Session-Id` response header on every
5339    // response — both freshly-assigned (so the client learns it) and
5340    // continuing (so the client confirms the id is still valid). The
5341    // spec is loose here; echoing always is the safer client contract.
5342    crate::mcp_session::set_session_id_header(response.headers_mut(), &session_id);
5343    // Tracing hook lets operators see new-session creation rate in
5344    // `solo daemon` logs without grepping body bytes.
5345    if freshly_assigned {
5346        tracing::debug!(
5347            session_id = %session_id,
5348            "mcp-http: assigned new session id"
5349        );
5350    }
5351    response
5352}
5353
5354/// Heartbeat cadence for the resumable `/mcp` GET stream. Matches the
5355/// `/v1/graph/stream` discipline (30s) so operator tooling can use one
5356/// timeout knob. v0.11.0 P2 makes this configurable indirectly via the
5357/// helper signature of [`build_mcp_session_stream`] so tests can pass
5358/// a short interval without driving the real clock for 30s.
5359pub const MCP_STREAM_HEARTBEAT_SECS: u64 = 30;
5360
5361/// `GET /mcp` — resumable Server-Sent Events stream for one MCP session.
5362///
5363/// v0.11.0 P2 replaces v0.10.2's `pending().await` stub with a real
5364/// `select!` loop over the session's broadcast event channel. Per the
5365/// MCP Streamable HTTP transport spec, the GET endpoint is the
5366/// server's path to push:
5367///
5368///   - `event: init` — handshake confirming the stream is live;
5369///   - `event: message` — JSON-RPC `notifications/message` (P4 bridge);
5370///   - `event: progress` — JSON-RPC `notifications/progress` (P3 long
5371///     tool calls);
5372///   - `event: heartbeat` — periodic liveness ping every
5373///     [`MCP_STREAM_HEARTBEAT_SECS`] seconds;
5374///   - `event: lagged` — emitted once when a reconnecting client's
5375///     `Last-Event-ID` is older than the broadcast buffer's oldest
5376///     retained event (Decision E).
5377///
5378/// Wire format per the SSE spec — each event carries:
5379/// `id: <u64>\nevent: <kind>\ndata: <json>\n\n`. The `id:` field is the
5380/// monotonic per-session event id; clients echo the last-seen value
5381/// back in the `Last-Event-ID` header on reconnect to drive the
5382/// replay-from-cursor path.
5383///
5384/// **Session id REQUIRED.** Unlike `POST /mcp` (which auto-creates a
5385/// session on the session-init request), `GET /mcp` returns `404 Not
5386/// Found` if the request arrived without a `Mcp-Session-Id` header.
5387/// The GET stream's whole point is to attach to an existing session's
5388/// notification channel — a client opening a stream without a session
5389/// to attach it to is a programming error, not the entry point to the
5390/// session lifecycle.
5391async fn mcp_http_get_handler(
5392    TenantExtractor(tenant): TenantExtractor,
5393    State(state): State<SoloHttpState>,
5394    AuditPrincipal(principal): AuditPrincipal,
5395    request: axum::extract::Request,
5396) -> Response {
5397    let _ = principal; // audit principal pre-resolved by extractor; unused on GET
5398    let _ = state; // session resolution lives in the middleware; state unused here
5399
5400    // v0.11.0 P2: session is REQUIRED on GET. The middleware planted
5401    // the SessionId + Arc<SessionState> extensions on a hit. If the
5402    // request arrived without an `Mcp-Session-Id` header, the
5403    // middleware passes through (so unauth'd POSTs can session-init);
5404    // we observe that as a missing extension and return 404 here.
5405    let session_id = match request.extensions().get::<crate::mcp_session::SessionId>() {
5406        Some(id) => id.clone(),
5407        None => {
5408            return (
5409                StatusCode::NOT_FOUND,
5410                Json(serde_json::json!({
5411                    "error": crate::mcp_session::MCP_SESSION_EXPIRED_ERROR,
5412                    "status": 404,
5413                    "message": "GET /mcp requires an `Mcp-Session-Id` header \
5414                                from a prior POST /mcp; open one first",
5415                    "retry": "re-initialize",
5416                })),
5417            )
5418                .into_response();
5419        }
5420    };
5421    let session_state = match request.extensions().get::<std::sync::Arc<crate::mcp_session::SessionState>>() {
5422        Some(state) => state.clone(),
5423        None => {
5424            // Defensive: middleware should plant both extensions
5425            // together or neither, but log + 404 if we somehow see one
5426            // without the other.
5427            tracing::error!(
5428                "mcp_http_get_handler: SessionId extension present but \
5429                 SessionState extension missing — middleware bug"
5430            );
5431            return StatusCode::INTERNAL_SERVER_ERROR.into_response();
5432        }
5433    };
5434
5435    // Optional `Last-Event-ID` header — parse as u64; on parse failure
5436    // treat as `0` (the "never seen anything" sentinel) so a
5437    // malformed header doesn't 400 the reconnect.
5438    let last_event_id: u64 = request
5439        .headers()
5440        .get(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER)
5441        .and_then(|v| v.to_str().ok())
5442        .and_then(|s| s.trim().parse::<u64>().ok())
5443        .unwrap_or(0);
5444
5445    let tenant_id = tenant.tenant_id().to_string();
5446    let stream = build_mcp_session_stream(
5447        session_state,
5448        session_id.clone(),
5449        tenant_id,
5450        last_event_id,
5451        MCP_STREAM_HEARTBEAT_SECS,
5452    );
5453    // No axum keep-alive comment lines — we ship our own typed
5454    // `heartbeat` event the way `/v1/graph/stream` does. Setting the
5455    // axum-side interval to 1 hour effectively disables it; clients
5456    // distinguish liveness via the typed events on the stream.
5457    let sse = Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)));
5458    let mut response = sse.into_response();
5459    crate::mcp_session::set_session_id_header(response.headers_mut(), &session_id);
5460    response
5461}
5462
5463/// Per-subscriber state threaded through `futures::stream::unfold` for
5464/// the resumable `/mcp` GET stream. Carries the broadcast receiver +
5465/// heartbeat ticker + the queue of replay events (if any) that need
5466/// to drain before live events start flowing.
5467///
5468/// Holds an `Arc<SessionState>` for the SOLE purpose of keeping the
5469/// broadcast `Sender` alive for as long as this subscriber's stream
5470/// is open. Without that strong ref, dropping the only Arc the
5471/// handler held would close the channel and `rx.recv()` would
5472/// immediately return `Err(Closed)` — the integration test for the
5473/// heartbeat cadence caught this regression.
5474struct McpStreamState {
5475    /// Live event receiver subscribed BEFORE the replay snapshot is
5476    /// drained — so any event published during the replay window
5477    /// lands here and the subscriber's `last_replayed_id` cursor
5478    /// dedupes it against the replayed copy.
5479    rx: broadcast::Receiver<crate::mcp_session::McpStreamEvent>,
5480    /// Heartbeat tick — fires every `heartbeat_secs` regardless of
5481    /// real-event volume. Matches `/v1/graph/stream`'s discipline.
5482    heartbeat: tokio::time::Interval,
5483    /// FIFO queue of replay events still to emit before live events
5484    /// take over. Empties to `Vec::new()` after the last drain.
5485    replay_queue: Vec<crate::mcp_session::McpStreamEvent>,
5486    /// `Some(id)` once at least one event has been emitted (replayed
5487    /// OR live). Live broadcast events with `id <= last_emitted_id`
5488    /// are skipped — handles the race where an event lands in BOTH
5489    /// the replay snapshot AND the live broadcast receiver (because
5490    /// we subscribed before snapshotting).
5491    last_emitted_id: Option<u64>,
5492    /// `true` until the synthetic `event: init` has been emitted.
5493    /// Flipped to `false` on first poll.
5494    needs_init: bool,
5495    /// Init-event payload metadata. Pre-computed at handler entry so
5496    /// the unfold closure stays `Send`.
5497    session_id_str: String,
5498    tenant_id: String,
5499    /// Held only to keep the broadcast `Sender` (and thus the channel)
5500    /// alive for the stream's lifetime. The session store also holds
5501    /// an Arc, but that one expires under TTL — this Arc keeps the
5502    /// channel open for this single subscriber for as long as the
5503    /// client is connected.
5504    _session_state: std::sync::Arc<crate::mcp_session::SessionState>,
5505}
5506
5507/// Build the resumable SSE stream for one `/mcp` GET subscriber.
5508///
5509/// Flow per `unfold` iteration:
5510///
5511///   1. **needs_init poll** — emit one `event: init` with id 0 (we
5512///      never allocate event id 0 in `SessionState::publish_event`;
5513///      0 is reserved for the init event + the client's "never seen"
5514///      sentinel on `Last-Event-ID`). Returns immediately.
5515///   2. **replay drain** — while `replay_queue` is non-empty, pop the
5516///      front entry and emit it. Updates `last_emitted_id`.
5517///   3. **live select** — `tokio::select!` between
5518///      `rx.recv()` and `heartbeat.tick()`:
5519///      - `rx.recv() = Ok(event)` and `event.id > last_emitted_id` →
5520///        emit and update cursor;
5521///      - `rx.recv() = Ok(event)` and `event.id <= last_emitted_id` →
5522///        skip (dedupe overlap with the replayed copy);
5523///      - `rx.recv() = Err(Lagged(n))` → emit one synthetic
5524///        `event: lagged` with `data: {dropped: n}` and continue;
5525///      - `rx.recv() = Err(Closed)` → end the stream (session
5526///        dropped);
5527///      - `heartbeat.tick()` → emit an unaccounted-id `event: heartbeat`
5528///        (heartbeats DO NOT consume the session's event id space —
5529///        they're synthetic and idempotent, so a reconnecting client
5530///        doesn't need to see them in replay).
5531///
5532/// Heartbeats use SSE event id `0` (the same id space the init event
5533/// uses) and clients filter them client-side; the broadcast-channel
5534/// events use the session's real monotonic ids.
5535fn build_mcp_session_stream(
5536    session_state: std::sync::Arc<crate::mcp_session::SessionState>,
5537    session_id: crate::mcp_session::SessionId,
5538    tenant_id: String,
5539    last_event_id: u64,
5540    heartbeat_secs: u64,
5541) -> impl Stream<Item = Result<Event, Infallible>> {
5542    // 1. Subscribe BEFORE snapshotting so any event published during
5543    //    the snapshot window lands in the live receiver. We dedupe
5544    //    overlap against `last_emitted_id` below.
5545    let rx = session_state.subscribe_events();
5546
5547    // 2. Snapshot the replay buffer, then filter to events the client
5548    //    hasn't seen.
5549    let snapshot = session_state.snapshot_replay_buffer();
5550
5551    // 3. Decide replay shape based on `last_event_id` vs the snapshot.
5552    let mut replay_queue: Vec<crate::mcp_session::McpStreamEvent> = Vec::new();
5553    if last_event_id > 0 {
5554        // Client is reconnecting with a known cursor.
5555        let oldest_in_buffer = snapshot.first().map(|e| e.id);
5556        let newest_in_buffer = snapshot.last().map(|e| e.id);
5557        if let (Some(oldest), Some(newest)) = (oldest_in_buffer, newest_in_buffer) {
5558            if last_event_id + 1 < oldest {
5559                // Client missed events that have since been evicted
5560                // from the buffer. Emit one synthetic `event: lagged`
5561                // describing the gap, then resume from the buffer.
5562                let dropped = oldest.saturating_sub(last_event_id + 1);
5563                replay_queue.push(crate::mcp_session::McpStreamEvent {
5564                    id: 0,
5565                    event: crate::mcp_session::McpEventKind::Lagged,
5566                    data: serde_json::json!({
5567                        "dropped": dropped,
5568                        "last_event_id": last_event_id,
5569                        "oldest_available": oldest,
5570                    }),
5571                });
5572                replay_queue.extend(snapshot);
5573            } else if last_event_id >= newest {
5574                // Client is already caught up; nothing to replay.
5575            } else {
5576                replay_queue.extend(
5577                    snapshot
5578                        .into_iter()
5579                        .filter(|e| e.id > last_event_id),
5580                );
5581            }
5582        }
5583        // Empty snapshot + non-zero last_event_id: nothing to replay.
5584    }
5585    // last_event_id == 0: brand-new subscriber; no replay needed
5586    // (the `init` event below is the start of the stream from the
5587    // client's POV).
5588
5589    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
5590    let heartbeat =
5591        tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));
5592
5593    let stream_state = McpStreamState {
5594        rx,
5595        heartbeat,
5596        replay_queue,
5597        last_emitted_id: None,
5598        needs_init: true,
5599        session_id_str: session_id.to_string(),
5600        tenant_id,
5601        _session_state: session_state,
5602    };
5603
5604    futures::stream::unfold(stream_state, move |mut state| async move {
5605        // Phase 1: init event (one-shot).
5606        if state.needs_init {
5607            state.needs_init = false;
5608            let init_payload = serde_json::json!({
5609                "connected": true,
5610                "session_id": state.session_id_str,
5611                "tenant_id": state.tenant_id,
5612                "ts_ms": chrono::Utc::now().timestamp_millis(),
5613            });
5614            let ev = build_mcp_sse_event(
5615                0,
5616                crate::mcp_session::McpEventKind::Init,
5617                &init_payload,
5618            );
5619            return Some((Ok::<Event, Infallible>(ev), state));
5620        }
5621        // Phase 2: replay-queue drain (one entry per poll).
5622        if !state.replay_queue.is_empty() {
5623            let entry = state.replay_queue.remove(0);
5624            // Lagged synthetic entries don't bump last_emitted_id —
5625            // they have id 0 and consuming them as the cursor would
5626            // cause every subsequent live event to dedupe against
5627            // them. Real events DO bump the cursor.
5628            if entry.event != crate::mcp_session::McpEventKind::Lagged {
5629                state.last_emitted_id = Some(entry.id);
5630            }
5631            let ev = build_mcp_sse_event(entry.id, entry.event, &entry.data);
5632            return Some((Ok::<Event, Infallible>(ev), state));
5633        }
5634        // Phase 3: live select loop.
5635        loop {
5636            tokio::select! {
5637                event = state.rx.recv() => {
5638                    match event {
5639                        Ok(ev) => {
5640                            // Dedupe against the replay overlap: any
5641                            // event whose id we've already emitted
5642                            // (because it was in the replay snapshot)
5643                            // gets skipped here.
5644                            if let Some(last) = state.last_emitted_id
5645                                && ev.id <= last
5646                            {
5647                                continue;
5648                            }
5649                            state.last_emitted_id = Some(ev.id);
5650                            let sse = build_mcp_sse_event(ev.id, ev.event, &ev.data);
5651                            return Some((Ok::<Event, Infallible>(sse), state));
5652                        }
5653                        Err(broadcast::error::RecvError::Lagged(n)) => {
5654                            // Live subscriber drifted past the
5655                            // broadcast buffer's capacity. Emit one
5656                            // synthetic `event: lagged` and resume —
5657                            // clients re-fetch state on this signal.
5658                            tracing::warn!(
5659                                lagged = n,
5660                                session_id = %state.session_id_str,
5661                                "mcp GET stream subscriber lagged"
5662                            );
5663                            let lagged_payload = serde_json::json!({
5664                                "dropped": n,
5665                            });
5666                            let sse = build_mcp_sse_event(
5667                                0,
5668                                crate::mcp_session::McpEventKind::Lagged,
5669                                &lagged_payload,
5670                            );
5671                            return Some((Ok::<Event, Infallible>(sse), state));
5672                        }
5673                        Err(broadcast::error::RecvError::Closed) => {
5674                            tracing::debug!(
5675                                session_id = %state.session_id_str,
5676                                "mcp GET stream broadcast closed; ending SSE stream"
5677                            );
5678                            return None;
5679                        }
5680                    }
5681                }
5682                _ = state.heartbeat.tick() => {
5683                    let hb_payload = serde_json::json!({
5684                        "ts_ms": chrono::Utc::now().timestamp_millis(),
5685                    });
5686                    let sse = build_mcp_sse_event(
5687                        0,
5688                        crate::mcp_session::McpEventKind::Heartbeat,
5689                        &hb_payload,
5690                    );
5691                    return Some((Ok::<Event, Infallible>(sse), state));
5692                }
5693            }
5694        }
5695    })
5696}
5697
5698/// Build an SSE [`Event`] from a `(id, kind, payload)` triple. Falls
5699/// back to an event-only frame on JSON serialisation failure (matches
5700/// `/v1/graph/stream`'s defensive pattern).
5701fn build_mcp_sse_event(
5702    id: u64,
5703    kind: crate::mcp_session::McpEventKind,
5704    data: &serde_json::Value,
5705) -> Event {
5706    Event::default()
5707        .id(id.to_string())
5708        .event(kind.as_str())
5709        .json_data(data)
5710        .unwrap_or_else(|_| Event::default().id(id.to_string()).event(kind.as_str()))
5711}
5712
5713// ---------------------------------------------------------------------------
5714// Error mapping
5715// ---------------------------------------------------------------------------
5716
5717#[derive(Debug)]
5718pub struct ApiError {
5719    status: StatusCode,
5720    message: String,
5721}
5722
5723impl ApiError {
5724    fn bad_request(msg: impl Into<String>) -> Self {
5725        Self {
5726            status: StatusCode::BAD_REQUEST,
5727            message: msg.into(),
5728        }
5729    }
5730    fn not_found(msg: impl Into<String>) -> Self {
5731        Self {
5732            status: StatusCode::NOT_FOUND,
5733            message: msg.into(),
5734        }
5735    }
5736    fn internal(msg: impl Into<String>) -> Self {
5737        Self {
5738            status: StatusCode::INTERNAL_SERVER_ERROR,
5739            message: msg.into(),
5740        }
5741    }
5742}
5743
5744impl From<solo_core::Error> for ApiError {
5745    fn from(e: solo_core::Error) -> Self {
5746        use solo_core::Error;
5747        match e {
5748            Error::NotFound(msg) => ApiError::not_found(msg),
5749            Error::InvalidInput(msg) => ApiError::bad_request(msg),
5750            Error::Conflict(msg) => Self {
5751                status: StatusCode::CONFLICT,
5752                message: msg,
5753            },
5754            other => ApiError::internal(other.to_string()),
5755        }
5756    }
5757}
5758
5759impl IntoResponse for ApiError {
5760    fn into_response(self) -> Response {
5761        let body = serde_json::json!({
5762            "error": self.message,
5763            "status": self.status.as_u16(),
5764        });
5765        (self.status, Json(body)).into_response()
5766    }
5767}
5768
5769// SQL helper for recall used to live here; consolidated into
5770// solo_query::recall.
5771
5772#[cfg(test)]
5773mod handler_tests {
5774    //! In-process integration tests for the HTTP handler surface. We
5775    //! drive the axum Router directly via `tower::ServiceExt::oneshot`
5776    //! — no real TCP listener needed. Same `Harness`-shape as the MCP
5777    //! tests: real WriterActor + ReaderPool + StubEmbedder + StubVectorIndex.
5778    //!
5779    //! Tests live inline in this module rather than in a `tests/` dir
5780    //! because external integration-test exes triggered Windows UAC
5781    //! ERROR_ELEVATION_REQUIRED on the dev machine.
5782    use super::*;
5783    use axum::body::Body;
5784    use axum::http::{Request, StatusCode};
5785    use http_body_util::BodyExt;
5786    use serde_json::{Value, json};
5787    use solo_storage::test_support::StubVectorIndex;
5788    use solo_storage::{
5789        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig,
5790        StubEmbedder, TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
5791    };
5792    use solo_core::VectorIndex;
5793    use std::sync::Arc as StdArc;
5794    use tower::ServiceExt;
5795
5796    fn fake_config(dim: u32) -> SoloConfig {
5797        SoloConfig {
5798            schema_version: 1,
5799            salt_hex: "00000000000000000000000000000000".to_string(),
5800            embedder: EmbedderConfig {
5801                name: "stub".to_string(),
5802                version: "v1".to_string(),
5803                dim,
5804                dtype: "f32".to_string(),
5805            },
5806            identity: IdentityConfig::default(),
5807            documents: solo_storage::DocumentConfig::default(),
5808            auth: None,
5809            audit: solo_storage::AuditSettings::default(),
5810            redaction: solo_storage::RedactionConfig::default(),
5811            llm: None,
5812            triples: solo_storage::TriplesConfig::default(),
5813            sampling: solo_storage::SamplingConfig::default(),
5814        }
5815    }
5816
5817    struct Harness {
5818        router: axum::Router,
5819        _tmp: tempfile::TempDir,
5820        db_path: std::path::PathBuf,
5821        write_handle_extra: Option<solo_storage::WriteHandle>,
5822        join: Option<std::thread::JoinHandle<()>>,
5823        /// v0.10.0: handle to the per-tenant TenantHandle so SSE-flavoured
5824        /// tests can call `harness.invalidate_sender().send(...)` to
5825        /// simulate writer-actor invalidations (or grab a Receiver via
5826        /// `.subscribe()` for subscriber-count assertions).
5827        tenant_handle: StdArc<TenantHandle>,
5828        /// v0.10.0: clone of the registry Arc so `/v1/tenants` tests can
5829        /// seed additional tenant rows into the in-memory tenants_index
5830        /// stub via `registry.with_index(|idx| idx.register(...))`.
5831        registry: StdArc<TenantRegistry>,
5832        /// v0.11.0 P1: clone of the per-process MCP session store so
5833        /// tests can simulate TTL eviction (`delete` an id) without
5834        /// having to drive the full 30-min inactivity clock.
5835        mcp_sessions: crate::mcp_session::SessionStore,
5836    }
5837
5838    impl Harness {
5839        /// v0.10.0: clone the per-tenant broadcast Sender so tests can
5840        /// fire `InvalidateEvent`s directly without going through the
5841        /// writer-actor. The harness's writer is spawned via
5842        /// `WriterActor::spawn_full` (legacy variant, no invalidate
5843        /// plumb) so writer-driven events won't reach SSE subscribers
5844        /// in tests — tests use this Sender to simulate them.
5845        fn invalidate_sender(&self) -> tokio::sync::broadcast::Sender<InvalidateEvent> {
5846            self.tenant_handle.invalidate_sender().clone()
5847        }
5848    }
5849
5850    impl Harness {
5851        fn new(runtime: &tokio::runtime::Runtime) -> Self {
5852            Self::new_with_auth(runtime, None)
5853        }
5854
5855        /// Open a fresh side connection against the harness's DB. Used
5856        /// by graph_expand tests to seed clusters / triples / documents
5857        /// directly (the writer-actor doesn't expose those write paths).
5858        fn open_db(&self) -> rusqlite::Connection {
5859            solo_storage::test_support::open_test_db_at(&self.db_path)
5860        }
5861
5862        fn new_with_auth(
5863            runtime: &tokio::runtime::Runtime,
5864            bearer_token: Option<String>,
5865        ) -> Self {
5866            Self::new_with_auth_config(
5867                runtime,
5868                bearer_token.map(|token| crate::auth::AuthConfig::Bearer { token }),
5869            )
5870        }
5871
5872        fn new_with_auth_config(
5873            runtime: &tokio::runtime::Runtime,
5874            auth: Option<crate::auth::AuthConfig>,
5875        ) -> Self {
5876            use solo_storage::embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};
5877
5878            let tmp = tempfile::TempDir::new().unwrap();
5879            let dim = 16usize;
5880            let hnsw: StdArc<dyn VectorIndex + Send + Sync> = StdArc::new(StubVectorIndex::new(dim));
5881            let embedder: StdArc<dyn solo_core::Embedder> =
5882                StdArc::new(StubEmbedder::new("stub", "v1", dim));
5883            let path = tmp.path().join("test.db");
5884
5885            let embedder_id = {
5886                let conn = solo_storage::test_support::open_test_db_at(&path);
5887                get_or_insert_embedder_id(
5888                    &conn,
5889                    &EmbedderIdentity {
5890                        name: "stub".into(),
5891                        version: "v1".into(),
5892                        dim: dim as u32,
5893                        dtype: "f32".into(),
5894                    },
5895                )
5896                .unwrap()
5897            };
5898
5899            let conn = solo_storage::test_support::open_test_db_at(&path);
5900            let WriterSpawn { handle, join } = WriterActor::spawn_full(
5901                conn,
5902                hnsw.clone(),
5903                tmp.path().to_path_buf(),
5904                embedder_id,
5905            );
5906            let pool: ReaderPool =
5907                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });
5908
5909            // Build a TenantHandle from the assembled parts and wrap it
5910            // in a single-tenant test registry.
5911            let tenant_id = solo_core::TenantId::default_tenant();
5912            let tenant_handle = StdArc::new(
5913                TenantHandle::from_parts_for_tests(
5914                    tenant_id.clone(),
5915                    fake_config(dim as u32),
5916                    path.clone(),
5917                    tmp.path().to_path_buf(),
5918                    embedder_id,
5919                    hnsw,
5920                    embedder.clone(),
5921                    handle.clone(),
5922                    // The harness owns ANOTHER WriteHandle clone + the join.
5923                    // We give the TenantHandle a dummy join that immediately
5924                    // returns — it never gets joined because shutdown_all
5925                    // can't get exclusive Arc ownership when the harness
5926                    // also holds a writer clone.
5927                    std::thread::spawn(|| {}),
5928                    pool,
5929                ),
5930            );
5931            let tenant_handle_clone = tenant_handle.clone();
5932
5933            // Suppress the auto-spawned dummy thread by letting it finish.
5934            // We DON'T put the real `join` into the TenantHandle because
5935            // we keep our own clone of `handle` for the shutdown path.
5936            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
5937            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
5938                tmp.path().to_path_buf(),
5939                key,
5940                embedder,
5941                tenant_handle,
5942            ));
5943            let registry_clone = registry.clone();
5944
5945            // v0.11.0 P1: build the MCP session store inside the
5946            // harness runtime so the background sweep task's
5947            // `tokio::spawn` finds a runtime context. The store is
5948            // cheap to construct; the spawn happens once on `new()`.
5949            let mcp_sessions = runtime
5950                .block_on(async { crate::mcp_session::SessionStore::new() });
5951            let mcp_sessions_clone = mcp_sessions.clone();
5952            let state = SoloHttpState {
5953                registry,
5954                default_tenant: tenant_id,
5955                user_aliases: Arc::new(Vec::new()),
5956                mcp_sessions,
5957            };
5958            let router = router_with_auth_config(state, auth);
5959            Harness {
5960                router,
5961                _tmp: tmp,
5962                db_path: path,
5963                write_handle_extra: Some(handle),
5964                join: Some(join),
5965                tenant_handle: tenant_handle_clone,
5966                registry: registry_clone,
5967                mcp_sessions: mcp_sessions_clone,
5968            }
5969        }
5970
5971        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
5972            let join = self.join.take();
5973            let extra = self.write_handle_extra.take();
5974            // v0.10.0: the new `tenant_handle` Harness field holds another
5975            // `Arc<TenantHandle>` that owns its own WriteHandle clone.
5976            // We must drop our reference here so the inner WriteHandle
5977            // can be released when the registry drops below. Without
5978            // this, the writer thread's mpsc never closes and the join
5979            // times out at 5s.
5980            let tenant_handle = self.tenant_handle;
5981            // v0.10.0: same story for the new `registry` Arc clone the
5982            // tenants-list tests use to seed extra index rows — the
5983            // state inside the router holds one Arc, this is the
5984            // other; both must drop before the underlying registry
5985            // dies and releases its index-mutex / cached handles.
5986            let registry = self.registry;
5987            runtime.block_on(async move {
5988                drop(extra);
5989                drop(tenant_handle); // drop Harness's direct tenant Arc
5990                drop(registry); // drop Harness's direct registry Arc
5991                drop(self.router); // drops state → drops pool inside runtime ctx
5992                drop(self._tmp);
5993                if let Some(join) = join {
5994                    let (tx, rx) = std::sync::mpsc::channel();
5995                    std::thread::spawn(move || {
5996                        let _ = tx.send(join.join());
5997                    });
5998                    tokio::task::spawn_blocking(move || {
5999                        rx.recv_timeout(std::time::Duration::from_secs(5))
6000                    })
6001                    .await
6002                    .expect("blocking task")
6003                    .expect("writer thread did not exit within 5s")
6004                    .expect("writer thread panicked");
6005                }
6006            });
6007        }
6008    }
6009
6010    fn rt() -> tokio::runtime::Runtime {
6011        tokio::runtime::Builder::new_multi_thread()
6012            .worker_threads(2)
6013            .enable_all()
6014            .build()
6015            .unwrap()
6016    }
6017
6018    /// Issue one HTTP request through the router and capture status +
6019    /// JSON body. `body` may be `None` for GET/DELETE; `auth` adds an
6020    /// `Authorization` header value verbatim (e.g. `"Bearer xyz"`).
6021    async fn call(
6022        router: axum::Router,
6023        method: &str,
6024        uri: &str,
6025        body: Option<Value>,
6026    ) -> (StatusCode, Value) {
6027        call_with_auth(router, method, uri, body, None).await
6028    }
6029
6030    async fn call_with_auth(
6031        router: axum::Router,
6032        method: &str,
6033        uri: &str,
6034        body: Option<Value>,
6035        auth: Option<&str>,
6036    ) -> (StatusCode, Value) {
6037        let mut req_builder = Request::builder()
6038            .method(method)
6039            .uri(uri)
6040            .header("content-type", "application/json");
6041        if let Some(a) = auth {
6042            req_builder = req_builder.header("authorization", a);
6043        }
6044        let req = if let Some(b) = body {
6045            let bytes = serde_json::to_vec(&b).unwrap();
6046            req_builder.body(Body::from(bytes)).unwrap()
6047        } else {
6048            req_builder = req_builder.header("content-length", "0");
6049            req_builder.body(Body::empty()).unwrap()
6050        };
6051        let resp = router.oneshot(req).await.expect("oneshot");
6052        let status = resp.status();
6053        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
6054        let v: Value = if body_bytes.is_empty() {
6055            Value::Null
6056        } else {
6057            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
6058        };
6059        (status, v)
6060    }
6061
6062    #[test]
6063    fn health_returns_ok() {
6064        let runtime = rt();
6065        let h = Harness::new(&runtime);
6066        let r = h.router.clone();
6067        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
6068        assert_eq!(status, StatusCode::OK);
6069        h.shutdown(&runtime);
6070    }
6071
6072    /// `GET /openapi.json` returns a parseable OpenAPI 3.x document with
6073    /// the four `memory.*` endpoints + their request/response schemas.
6074    /// Acts as a drift detector: if a future commit adds/removes a route
6075    /// without updating `openapi_spec`, this test fails loudly.
6076    #[test]
6077    fn openapi_json_describes_all_endpoints() {
6078        let runtime = rt();
6079        let h = Harness::new(&runtime);
6080        let r = h.router.clone();
6081        let (status, spec) = runtime.block_on(call(r, "GET", "/openapi.json", None));
6082        assert_eq!(status, StatusCode::OK);
6083        assert!(spec.is_object(), "openapi.json must be a JSON object");
6084
6085        // Top-level shape per OpenAPI 3.1.
6086        assert!(
6087            spec.get("openapi")
6088                .and_then(|v| v.as_str())
6089                .is_some_and(|s| s.starts_with("3.")),
6090            "missing or wrong openapi version: {spec}"
6091        );
6092        assert!(spec.pointer("/info/title").is_some());
6093        assert!(spec.pointer("/info/version").is_some());
6094
6095        // Every route the router serves must be documented.
6096        let paths = spec
6097            .get("paths")
6098            .and_then(|v| v.as_object())
6099            .expect("paths must be an object");
6100        for expected in [
6101            "/health",
6102            "/openapi.json",
6103            "/memory",
6104            "/memory/search",
6105            "/memory/consolidate",
6106            "/memory/{id}",
6107            // Path 1 derived-layer endpoints (v0.4.0+):
6108            "/memory/themes",
6109            "/memory/facts_about",
6110            "/memory/contradictions",
6111            // v0.5.0 Priority 3:
6112            "/memory/clusters/{cluster_id}",
6113            // v0.7.0 P6 — document operations:
6114            "/memory/documents",
6115            "/memory/documents/search",
6116            "/memory/documents/{id}",
6117        ] {
6118            assert!(
6119                paths.contains_key(expected),
6120                "openapi paths missing {expected}: {paths:?}"
6121            );
6122        }
6123
6124        // Method coverage on /memory/documents: must document both POST
6125        // (ingest) and GET (list).
6126        let docs = paths.get("/memory/documents").expect("/memory/documents");
6127        assert!(docs.get("post").is_some(), "POST /memory/documents undocumented");
6128        assert!(docs.get("get").is_some(), "GET /memory/documents undocumented");
6129
6130        // Method coverage on /memory/documents/{id}: must document both
6131        // GET (inspect) and DELETE (forget).
6132        let docid = paths
6133            .get("/memory/documents/{id}")
6134            .expect("/memory/documents/{id}");
6135        assert!(
6136            docid.get("get").is_some(),
6137            "GET /memory/documents/{{id}} undocumented"
6138        );
6139        assert!(
6140            docid.get("delete").is_some(),
6141            "DELETE /memory/documents/{{id}} undocumented"
6142        );
6143
6144        // Method coverage on /memory/{id}: must document both GET (inspect)
6145        // and DELETE (forget).
6146        let memid = paths.get("/memory/{id}").expect("memory/{id}");
6147        assert!(memid.get("get").is_some(), "GET /memory/{{id}} undocumented");
6148        assert!(
6149            memid.get("delete").is_some(),
6150            "DELETE /memory/{{id}} undocumented"
6151        );
6152
6153        // Component schemas referenced from paths must be defined.
6154        for schema_name in [
6155            "RememberRequest",
6156            "RememberResponse",
6157            "RecallRequest",
6158            "RecallResult",
6159            "EpisodeRecord",
6160            "ApiError",
6161            "ConsolidationScope",
6162            "ConsolidationReport",
6163            // Path 1 derived-layer schemas (v0.4.0+):
6164            "ThemeHit",
6165            "FactHit",
6166            "ContradictionHit",
6167            // v0.5.0 Priority 3:
6168            "ClusterRecord",
6169            // v0.7.0 P6 — document schemas:
6170            "IngestDocumentRequest",
6171            "IngestReport",
6172            "ForgetDocumentReport",
6173            "SearchDocsRequest",
6174            "DocSearchHit",
6175            "DocumentInspectResult",
6176            "DocumentSummary",
6177        ] {
6178            let ptr = format!("/components/schemas/{schema_name}");
6179            assert!(
6180                spec.pointer(&ptr).is_some(),
6181                "component schema {schema_name} missing"
6182            );
6183        }
6184
6185        // bearerAuth security scheme is declared (LAN deployments need it).
6186        assert!(
6187            spec.pointer("/components/securitySchemes/bearerAuth")
6188                .is_some(),
6189            "bearerAuth security scheme missing"
6190        );
6191
6192        h.shutdown(&runtime);
6193    }
6194
6195    /// `/openapi.json` must remain unauthenticated even when bearer auth
6196    /// is enabled — the spec describes the API shape, not secrets, and
6197    /// codegen tooling shouldn't need a credential to fetch it.
6198    #[test]
6199    fn openapi_json_is_exempt_from_bearer_auth() {
6200        let runtime = rt();
6201        let h = Harness::new_with_auth(&runtime, Some("super-secret".into()));
6202        let r = h.router.clone();
6203        // No Authorization header → still 200 for /openapi.json.
6204        let (status, _body) = runtime.block_on(call(r, "GET", "/openapi.json", None));
6205        assert_eq!(status, StatusCode::OK);
6206        h.shutdown(&runtime);
6207    }
6208
6209    #[test]
6210    fn remember_returns_memory_id() {
6211        let runtime = rt();
6212        let h = Harness::new(&runtime);
6213        let r = h.router.clone();
6214        let (status, body) = runtime.block_on(call(
6215            r,
6216            "POST",
6217            "/memory",
6218            Some(json!({ "content": "http harness test" })),
6219        ));
6220        assert_eq!(status, StatusCode::OK);
6221        let mid = body.get("memory_id").and_then(|v| v.as_str()).unwrap();
6222        assert_eq!(mid.len(), 36, "uuid length");
6223        h.shutdown(&runtime);
6224    }
6225
6226    #[test]
6227    fn empty_content_returns_400() {
6228        let runtime = rt();
6229        let h = Harness::new(&runtime);
6230        let r = h.router.clone();
6231        let (status, body) =
6232            runtime.block_on(call(r, "POST", "/memory", Some(json!({ "content": "" }))));
6233        assert_eq!(status, StatusCode::BAD_REQUEST);
6234        assert!(
6235            body.get("error")
6236                .and_then(|e| e.as_str())
6237                .map(|s| s.contains("must not be empty"))
6238                .unwrap_or(false),
6239            "got: {body}"
6240        );
6241        h.shutdown(&runtime);
6242    }
6243
6244    #[test]
6245    fn empty_query_returns_400() {
6246        let runtime = rt();
6247        let h = Harness::new(&runtime);
6248        let r = h.router.clone();
6249        let (status, body) = runtime.block_on(call(
6250            r,
6251            "POST",
6252            "/memory/search",
6253            Some(json!({ "query": "" })),
6254        ));
6255        assert_eq!(status, StatusCode::BAD_REQUEST);
6256        assert!(
6257            body.get("error")
6258                .and_then(|e| e.as_str())
6259                .map(|s| s.contains("must not be empty"))
6260                .unwrap_or(false),
6261            "got: {body}"
6262        );
6263        h.shutdown(&runtime);
6264    }
6265
6266    #[test]
6267    fn inspect_unknown_returns_404() {
6268        let runtime = rt();
6269        let h = Harness::new(&runtime);
6270        let r = h.router.clone();
6271        let (status, body) = runtime.block_on(call(
6272            r,
6273            "GET",
6274            "/memory/00000000-0000-7000-8000-000000000000",
6275            None,
6276        ));
6277        assert_eq!(status, StatusCode::NOT_FOUND);
6278        assert!(body.get("error").is_some(), "got: {body}");
6279        h.shutdown(&runtime);
6280    }
6281
6282    #[test]
6283    fn inspect_invalid_id_returns_400() {
6284        let runtime = rt();
6285        let h = Harness::new(&runtime);
6286        let r = h.router.clone();
6287        let (status, _body) = runtime.block_on(call(r, "GET", "/memory/not-a-uuid", None));
6288        assert_eq!(status, StatusCode::BAD_REQUEST);
6289        h.shutdown(&runtime);
6290    }
6291
6292    #[test]
6293    fn forget_unknown_returns_404() {
6294        let runtime = rt();
6295        let h = Harness::new(&runtime);
6296        let r = h.router.clone();
6297        let (status, _body) = runtime.block_on(call(
6298            r,
6299            "DELETE",
6300            "/memory/00000000-0000-7000-8000-000000000000",
6301            None,
6302        ));
6303        assert_eq!(status, StatusCode::NOT_FOUND);
6304        h.shutdown(&runtime);
6305    }
6306
6307    /// `POST /memory/consolidate` runs the cluster pass and returns
6308    /// the report as JSON. With an empty body, `ConsolidationScope`
6309    /// defaults to unbounded; with a non-empty body, the
6310    /// `window_days` field is honored. The Harness's writer is
6311    /// spawned without a Steward, so `abstractions_built` stays 0
6312    /// even when `clusters_built` is nonzero — same posture as the
6313    /// daemon today.
6314    #[test]
6315    fn consolidate_endpoint_returns_report() {
6316        let runtime = rt();
6317        let h = Harness::new(&runtime);
6318        let r = h.router.clone();
6319        runtime.block_on(async move {
6320            // Empty DB → all-zero report; structural assertion only.
6321            let (status, body) = call(r.clone(), "POST", "/memory/consolidate", None).await;
6322            assert_eq!(status, StatusCode::OK);
6323            for field in [
6324                "episodes_seen",
6325                "clusters_built",
6326                "episodes_clustered",
6327                "abstractions_built",
6328                "triples_built",
6329                "contradictions_found",
6330            ] {
6331                assert!(
6332                    body.get(field).and_then(|v| v.as_u64()).is_some(),
6333                    "missing field {field}: {body}"
6334                );
6335            }
6336            assert_eq!(body["episodes_seen"], 0);
6337            assert_eq!(body["clusters_built"], 0);
6338
6339            // Non-empty body with window_days → still 200; unmistakable
6340            // shape round-trips through ConsolidationScope's serde.
6341            let (status2, _body2) = call(
6342                r,
6343                "POST",
6344                "/memory/consolidate",
6345                Some(json!({ "window_days": 7 })),
6346            )
6347            .await;
6348            assert_eq!(status2, StatusCode::OK);
6349        });
6350        h.shutdown(&runtime);
6351    }
6352
6353    #[test]
6354    fn auth_required_routes_reject_missing_token() {
6355        let runtime = rt();
6356        let h = Harness::new_with_auth(&runtime, Some("secret-xyz".into()));
6357        let r = h.router.clone();
6358        runtime.block_on(async move {
6359            // No Authorization header → 401.
6360            let (status, _body) = call(
6361                r.clone(),
6362                "POST",
6363                "/memory",
6364                Some(json!({ "content": "x" })),
6365            )
6366            .await;
6367            assert_eq!(status, StatusCode::UNAUTHORIZED);
6368
6369            // Wrong token → 401.
6370            let (status, _body) = call_with_auth(
6371                r.clone(),
6372                "POST",
6373                "/memory",
6374                Some(json!({ "content": "x" })),
6375                Some("Bearer wrong-token"),
6376            )
6377            .await;
6378            assert_eq!(status, StatusCode::UNAUTHORIZED);
6379
6380            // Correct token → handler runs (200).
6381            let (status, body) = call_with_auth(
6382                r.clone(),
6383                "POST",
6384                "/memory",
6385                Some(json!({ "content": "authed" })),
6386                Some("Bearer secret-xyz"),
6387            )
6388            .await;
6389            assert_eq!(status, StatusCode::OK);
6390            assert!(body.get("memory_id").is_some());
6391        });
6392        h.shutdown(&runtime);
6393    }
6394
6395    #[test]
6396    fn health_endpoint_does_not_require_auth() {
6397        let runtime = rt();
6398        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
6399        let r = h.router.clone();
6400        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
6401        // Liveness probes should work without credentials.
6402        assert_eq!(status, StatusCode::OK);
6403        h.shutdown(&runtime);
6404    }
6405
6406    #[test]
6407    fn auth_response_includes_www_authenticate_header() {
6408        // Verify the WWW-Authenticate hint that lets a well-behaved
6409        // client know it's a bearer-auth scheme. We check via raw
6410        // request → response (oneshot returns Response, but our
6411        // call() helper drops the headers; build the request manually).
6412        let runtime = rt();
6413        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
6414        let r = h.router.clone();
6415        runtime.block_on(async move {
6416            let req = Request::builder()
6417                .method("POST")
6418                .uri("/memory")
6419                .header("content-type", "application/json")
6420                .body(Body::from(serde_json::to_vec(&json!({ "content": "x" })).unwrap()))
6421                .unwrap();
6422            let resp = r.oneshot(req).await.unwrap();
6423            assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
6424            let www = resp
6425                .headers()
6426                .get("www-authenticate")
6427                .and_then(|v| v.to_str().ok())
6428                .unwrap_or("");
6429            assert!(
6430                www.starts_with("Bearer"),
6431                "expected WWW-Authenticate: Bearer..., got: {www}"
6432            );
6433        });
6434        h.shutdown(&runtime);
6435    }
6436
6437    // ---------------------------------------------------------------------
6438    // v0.8.0 P3: OIDC end-to-end. Spin up a fake IdP (wiremock) that
6439    // serves an OIDC discovery doc + JWKS, mint a token claiming
6440    // `solo_tenant = "default"`, and verify it routes through the
6441    // middleware + TenantExtractor + handler.
6442    // ---------------------------------------------------------------------
6443
6444    fn base64_url_for_test(bytes: &[u8]) -> String {
6445        use base64::Engine;
6446        base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes)
6447    }
6448
6449    /// Spin up a single-purpose fake OIDC IdP for these tests. Returns
6450    /// (mock_server, discovery_url, secret, kid).
6451    async fn spin_fake_idp() -> (wiremock::MockServer, String, Vec<u8>, &'static str) {
6452        use wiremock::matchers::{method, path};
6453        use wiremock::{Mock, MockServer, ResponseTemplate};
6454        let server = MockServer::start().await;
6455        let secret = b"http-test-secret-for-hmac-fixture".to_vec();
6456        let kid = "http-test-kid";
6457        let discovery = serde_json::json!({
6458            "issuer": server.uri(),
6459            "jwks_uri": format!("{}/jwks", server.uri()),
6460        });
6461        Mock::given(method("GET"))
6462            .and(path("/.well-known/openid-configuration"))
6463            .respond_with(ResponseTemplate::new(200).set_body_json(discovery))
6464            .mount(&server)
6465            .await;
6466        let jwks = serde_json::json!({
6467            "keys": [
6468                {
6469                    "kty": "oct",
6470                    "kid": kid,
6471                    "alg": "HS256",
6472                    "k": base64_url_for_test(&secret),
6473                }
6474            ]
6475        });
6476        Mock::given(method("GET"))
6477            .and(path("/jwks"))
6478            .respond_with(ResponseTemplate::new(200).set_body_json(jwks))
6479            .mount(&server)
6480            .await;
6481        let discovery_url = format!("{}/.well-known/openid-configuration", server.uri());
6482        (server, discovery_url, secret, kid)
6483    }
6484
6485    fn mint_idp_token(
6486        server_uri: &str,
6487        kid: &str,
6488        secret: &[u8],
6489        tenant_claim: &str,
6490        audience: &str,
6491    ) -> String {
6492        use jsonwebtoken::{Algorithm, EncodingKey, Header};
6493        let mut header = Header::new(Algorithm::HS256);
6494        header.kid = Some(kid.to_string());
6495        let now = std::time::SystemTime::now()
6496            .duration_since(std::time::UNIX_EPOCH)
6497            .unwrap()
6498            .as_secs();
6499        let claims = serde_json::json!({
6500            "iss": server_uri,
6501            "sub": "test-user-1",
6502            "aud": audience,
6503            "exp": now + 600,
6504            "iat": now,
6505            "solo_tenant": tenant_claim,
6506        });
6507        jsonwebtoken::encode(&header, &claims, &EncodingKey::from_secret(secret))
6508            .expect("mint token")
6509    }
6510
6511    #[test]
6512    fn http_oidc_accept_resolves_to_tenant_from_claim() {
6513        let runtime = rt();
6514        let (fake_server, discovery_url, secret, kid) =
6515            runtime.block_on(async { spin_fake_idp().await });
6516        let server_uri = fake_server.uri();
6517        // Keep the wiremock server alive for the duration of this test.
6518        let _server_guard = fake_server;
6519
6520        let auth = crate::auth::AuthConfig::Oidc {
6521            discovery_url,
6522            audience: "test-audience".to_string(),
6523            tenant_claim_name: "solo_tenant".to_string(),
6524        };
6525        let h = Harness::new_with_auth_config(&runtime, Some(auth));
6526        let r = h.router.clone();
6527
6528        // Mint a token claiming the harness's default tenant.
6529        let token = mint_idp_token(
6530            &server_uri,
6531            kid,
6532            &secret,
6533            "default",
6534            "test-audience",
6535        );
6536
6537        runtime.block_on(async move {
6538            // POST /memory with a valid OIDC token → handler runs, returns memory_id.
6539            let (status, body) = call_with_auth(
6540                r.clone(),
6541                "POST",
6542                "/memory",
6543                Some(json!({ "content": "oidc-routed content" })),
6544                Some(&format!("Bearer {token}")),
6545            )
6546            .await;
6547            assert_eq!(status, StatusCode::OK, "got body: {body}");
6548            assert!(body.get("memory_id").is_some(), "no memory_id in {body}");
6549        });
6550        h.shutdown(&runtime);
6551    }
6552
6553    #[test]
6554    fn http_oidc_reject_missing_token_returns_401() {
6555        let runtime = rt();
6556        let (fake_server, discovery_url, _secret, _kid) =
6557            runtime.block_on(async { spin_fake_idp().await });
6558        let _server_guard = fake_server;
6559        let auth = crate::auth::AuthConfig::Oidc {
6560            discovery_url,
6561            audience: "test-audience".to_string(),
6562            tenant_claim_name: "solo_tenant".to_string(),
6563        };
6564        let h = Harness::new_with_auth_config(&runtime, Some(auth));
6565        let r = h.router.clone();
6566        runtime.block_on(async move {
6567            // No Authorization header.
6568            let (status, _body) =
6569                call(r.clone(), "POST", "/memory", Some(json!({ "content": "x" }))).await;
6570            assert_eq!(status, StatusCode::UNAUTHORIZED);
6571
6572            // Garbage token → 401 (invalid signature / not a JWT).
6573            let (status, _body) = call_with_auth(
6574                r.clone(),
6575                "POST",
6576                "/memory",
6577                Some(json!({ "content": "x" })),
6578                Some("Bearer not-a-real-jwt"),
6579            )
6580            .await;
6581            assert_eq!(status, StatusCode::UNAUTHORIZED);
6582        });
6583        h.shutdown(&runtime);
6584    }
6585
6586    #[test]
6587    fn full_remember_recall_inspect_forget_round_trip() {
6588        let runtime = rt();
6589        let h = Harness::new(&runtime);
6590        let r = h.router.clone();
6591        runtime.block_on(async move {
6592            // POST /memory
6593            let (status, body) = call(
6594                r.clone(),
6595                "POST",
6596                "/memory",
6597                Some(json!({ "content": "round-trip content" })),
6598            )
6599            .await;
6600            assert_eq!(status, StatusCode::OK);
6601            let mid = body
6602                .get("memory_id")
6603                .and_then(|v| v.as_str())
6604                .unwrap()
6605                .to_string();
6606
6607            // POST /memory/search — exact-match (StubEmbedder) returns the row.
6608            let (status, body) = call(
6609                r.clone(),
6610                "POST",
6611                "/memory/search",
6612                Some(json!({ "query": "round-trip content", "limit": 5 })),
6613            )
6614            .await;
6615            assert_eq!(status, StatusCode::OK);
6616            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
6617            assert!(
6618                hits.iter()
6619                    .any(|h| h.get("content").and_then(|c| c.as_str())
6620                        == Some("round-trip content")),
6621                "expected hit with content; got: {body}"
6622            );
6623
6624            // GET /memory/{id}
6625            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
6626            assert_eq!(status, StatusCode::OK);
6627            assert_eq!(body.get("status").and_then(|v| v.as_str()), Some("active"));
6628
6629            // DELETE /memory/{id}
6630            let (status, _body) =
6631                call(r.clone(), "DELETE", &format!("/memory/{mid}"), None).await;
6632            assert_eq!(status, StatusCode::NO_CONTENT);
6633
6634            // GET again — still readable but status='forgotten'
6635            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
6636            assert_eq!(status, StatusCode::OK);
6637            assert_eq!(
6638                body.get("status").and_then(|v| v.as_str()),
6639                Some("forgotten")
6640            );
6641
6642            // POST /memory/search — forgotten row excluded.
6643            let (status, body) = call(
6644                r.clone(),
6645                "POST",
6646                "/memory/search",
6647                Some(json!({ "query": "round-trip content", "limit": 5 })),
6648            )
6649            .await;
6650            assert_eq!(status, StatusCode::OK);
6651            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
6652            assert!(
6653                hits.iter().all(|h| h.get("memory_id").and_then(|m| m.as_str())
6654                    != Some(mid.as_str())),
6655                "forgotten row should be excluded from recall: {body}"
6656            );
6657        });
6658        h.shutdown(&runtime);
6659    }
6660
6661    // Path 1 derived-layer endpoint tests (v0.4.0+). Wire-path only —
6662    // the actual content correctness is covered by solo-query::derived's
6663    // own tests (Sub-task A). These verify the HTTP shape: GET routing,
6664    // Query-string param parsing, JSON-array response body, validation
6665    // 400s for invalid inputs.
6666
6667    #[test]
6668    fn themes_endpoint_returns_empty_array_on_empty_db() {
6669        let runtime = rt();
6670        let h = Harness::new(&runtime);
6671        let r = h.router.clone();
6672        let (status, body) =
6673            runtime.block_on(call(r, "GET", "/memory/themes", None));
6674        assert_eq!(status, StatusCode::OK);
6675        assert!(body.is_array(), "expected array, got {body}");
6676        assert_eq!(body.as_array().unwrap().len(), 0);
6677        h.shutdown(&runtime);
6678    }
6679
6680    #[test]
6681    fn themes_endpoint_passes_through_query_params() {
6682        let runtime = rt();
6683        let h = Harness::new(&runtime);
6684        let r = h.router.clone();
6685        let (status, body) = runtime.block_on(call(
6686            r,
6687            "GET",
6688            "/memory/themes?window_days=7&limit=20",
6689            None,
6690        ));
6691        assert_eq!(status, StatusCode::OK);
6692        assert!(body.is_array(), "expected array, got {body}");
6693        h.shutdown(&runtime);
6694    }
6695
6696    #[test]
6697    fn facts_about_endpoint_requires_subject() {
6698        let runtime = rt();
6699        let h = Harness::new(&runtime);
6700        let r = h.router.clone();
6701        // Missing subject — axum's Query extractor 422 (Unprocessable
6702        // Entity) on missing required field; some axum versions
6703        // surface as 400. Accept either.
6704        let (status, _body) =
6705            runtime.block_on(call(r, "GET", "/memory/facts_about", None));
6706        assert!(
6707            status == StatusCode::BAD_REQUEST
6708                || status == StatusCode::UNPROCESSABLE_ENTITY,
6709            "expected 400 or 422 for missing subject, got {status}"
6710        );
6711        h.shutdown(&runtime);
6712    }
6713
6714    #[test]
6715    fn facts_about_endpoint_rejects_blank_subject() {
6716        let runtime = rt();
6717        let h = Harness::new(&runtime);
6718        let r = h.router.clone();
6719        // Whitespace-only subject reaches the handler then trips its
6720        // own validation → ApiError::bad_request → 400.
6721        let (status, body) = runtime.block_on(call(
6722            r,
6723            "GET",
6724            "/memory/facts_about?subject=%20%20",
6725            None,
6726        ));
6727        assert_eq!(status, StatusCode::BAD_REQUEST);
6728        assert!(
6729            body.get("error")
6730                .and_then(|v| v.as_str())
6731                .is_some_and(|s| s.contains("subject")),
6732            "expected error mentioning subject, got {body}"
6733        );
6734        h.shutdown(&runtime);
6735    }
6736
6737    #[test]
6738    fn facts_about_endpoint_returns_empty_array_for_unknown_subject() {
6739        let runtime = rt();
6740        let h = Harness::new(&runtime);
6741        let r = h.router.clone();
6742        let (status, body) = runtime.block_on(call(
6743            r,
6744            "GET",
6745            "/memory/facts_about?subject=NobodyKnows",
6746            None,
6747        ));
6748        assert_eq!(status, StatusCode::OK);
6749        assert_eq!(body.as_array().unwrap().len(), 0);
6750        h.shutdown(&runtime);
6751    }
6752
6753    #[test]
6754    fn facts_about_endpoint_parses_include_as_object_query_param() {
6755        // v0.5.1 P8: `?include_as_object=true` must parse cleanly
6756        // through the `Query<FactsAboutQuery>` extractor. If the
6757        // struct field is missing or wrongly typed, axum returns
6758        // 400/422 before reaching the handler. We don't seed
6759        // triples; we only need the request to reach the handler
6760        // and produce a normal 200 + empty array. Mirrors
6761        // `inspect_cluster_endpoint_passes_full_content_query_param`.
6762        let runtime = rt();
6763        let h = Harness::new(&runtime);
6764        let r = h.router.clone();
6765        let (status, body) = runtime.block_on(call(
6766            r,
6767            "GET",
6768            "/memory/facts_about?subject=Maya&include_as_object=true",
6769            None,
6770        ));
6771        assert_eq!(
6772            status,
6773            StatusCode::OK,
6774            "expected 200 with include_as_object query param, got {status}"
6775        );
6776        assert!(body.is_array());
6777        h.shutdown(&runtime);
6778    }
6779
6780    #[test]
6781    fn inspect_cluster_endpoint_unknown_id_returns_404() {
6782        // Maps `Error::NotFound` from `solo_query::inspect_cluster`
6783        // through `ApiError::from` → 404. Mirrors the unknown-memory
6784        // case for `GET /memory/{id}`.
6785        let runtime = rt();
6786        let h = Harness::new(&runtime);
6787        let r = h.router.clone();
6788        let (status, body) = runtime.block_on(call(
6789            r,
6790            "GET",
6791            "/memory/clusters/no-such-cluster",
6792            None,
6793        ));
6794        assert_eq!(status, StatusCode::NOT_FOUND);
6795        assert!(
6796            body.get("error")
6797                .and_then(|v| v.as_str())
6798                .is_some_and(|s| s.contains("no-such-cluster")),
6799            "expected error mentioning cluster id, got {body}"
6800        );
6801        h.shutdown(&runtime);
6802    }
6803
6804    #[test]
6805    fn inspect_cluster_endpoint_passes_full_content_query_param() {
6806        // Even with no matching cluster (→ 404), the request must
6807        // reach the handler — proves the `?full_content=true` query
6808        // string parses cleanly (Query<InspectClusterQuery>::default
6809        // path didn't choke). If we accidentally fail at the extractor
6810        // we'd get a 400/422, not the expected 404.
6811        let runtime = rt();
6812        let h = Harness::new(&runtime);
6813        let r = h.router.clone();
6814        let (status, _body) = runtime.block_on(call(
6815            r,
6816            "GET",
6817            "/memory/clusters/missing?full_content=true",
6818            None,
6819        ));
6820        assert_eq!(status, StatusCode::NOT_FOUND);
6821        h.shutdown(&runtime);
6822    }
6823
6824    #[test]
6825    fn contradictions_endpoint_returns_empty_array_on_empty_db() {
6826        let runtime = rt();
6827        let h = Harness::new(&runtime);
6828        let r = h.router.clone();
6829        let (status, body) = runtime.block_on(call(
6830            r,
6831            "GET",
6832            "/memory/contradictions",
6833            None,
6834        ));
6835        assert_eq!(status, StatusCode::OK);
6836        assert!(body.is_array());
6837        assert_eq!(body.as_array().unwrap().len(), 0);
6838        h.shutdown(&runtime);
6839    }
6840
6841    #[test]
6842    fn derived_endpoints_require_bearer_when_auth_enabled() {
6843        let runtime = rt();
6844        let h = Harness::new_with_auth(&runtime, Some("secret-token".to_string()));
6845        // Each of the three new endpoints should reject missing token.
6846        // Per the existing tests' shutdown-timing comment: don't hold a
6847        // long-lived router clone across multiple iterations — drop the
6848        // clone before each subsequent oneshot, and don't keep a `let r =
6849        // h.router.clone()` alive across h.shutdown(). Re-clone per
6850        // iteration; the per-call clone is consumed by oneshot.
6851        for path in [
6852            "/memory/themes",
6853            "/memory/facts_about?subject=Sam",
6854            "/memory/contradictions",
6855            "/memory/clusters/any-id",
6856        ] {
6857            let (status, _) = runtime.block_on(call(h.router.clone(), "GET", path, None));
6858            assert_eq!(
6859                status,
6860                StatusCode::UNAUTHORIZED,
6861                "{path} should 401 without token"
6862            );
6863        }
6864        h.shutdown(&runtime);
6865    }
6866
6867    // ---- Document endpoints (v0.7.0 P6) ----
6868    //
6869    // Wire-path coverage. The `Harness` here uses
6870    // `WriterActor::spawn_full` without an embedder — same shape as the
6871    // existing handler tests. Ingest/search would fail at the writer
6872    // boundary with "writer has no embedder", but every other path
6873    // (404s, malformed ids, route shape, bearer auth gating, OpenAPI
6874    // documentation) is exercisable. Real end-to-end ingest→search
6875    // round-trip lives in `mcp_smoke.rs` where a real subprocess runs
6876    // with a fully-wired writer.
6877
6878    #[test]
6879    fn list_documents_endpoint_returns_empty_array_on_empty_db() {
6880        let runtime = rt();
6881        let h = Harness::new(&runtime);
6882        let r = h.router.clone();
6883        let (status, body) = runtime.block_on(call(r, "GET", "/memory/documents", None));
6884        assert_eq!(status, StatusCode::OK);
6885        assert!(body.is_array(), "expected array, got {body}");
6886        assert_eq!(body.as_array().unwrap().len(), 0);
6887        h.shutdown(&runtime);
6888    }
6889
6890    #[test]
6891    fn list_documents_endpoint_parses_query_params() {
6892        let runtime = rt();
6893        let h = Harness::new(&runtime);
6894        let r = h.router.clone();
6895        let (status, body) = runtime.block_on(call(
6896            r,
6897            "GET",
6898            "/memory/documents?limit=5&offset=0&include_forgotten=true",
6899            None,
6900        ));
6901        assert_eq!(status, StatusCode::OK);
6902        assert!(body.is_array());
6903        h.shutdown(&runtime);
6904    }
6905
6906    #[test]
6907    fn ingest_document_endpoint_rejects_empty_path() {
6908        let runtime = rt();
6909        let h = Harness::new(&runtime);
6910        let r = h.router.clone();
6911        let (status, body) = runtime.block_on(call(
6912            r,
6913            "POST",
6914            "/memory/documents",
6915            Some(json!({ "path": "" })),
6916        ));
6917        assert_eq!(status, StatusCode::BAD_REQUEST);
6918        assert!(
6919            body.get("error")
6920                .and_then(|v| v.as_str())
6921                .is_some_and(|s| s.contains("path")),
6922            "expected error mentioning path, got {body}"
6923        );
6924        h.shutdown(&runtime);
6925    }
6926
6927    #[test]
6928    fn search_docs_endpoint_rejects_empty_query() {
6929        let runtime = rt();
6930        let h = Harness::new(&runtime);
6931        let r = h.router.clone();
6932        let (status, body) = runtime.block_on(call(
6933            r,
6934            "POST",
6935            "/memory/documents/search",
6936            Some(json!({ "query": "   " })),
6937        ));
6938        assert_eq!(status, StatusCode::BAD_REQUEST);
6939        assert!(
6940            body.get("error")
6941                .and_then(|v| v.as_str())
6942                .is_some_and(|s| s.contains("must not be empty")
6943                    || s.contains("doc_search")),
6944            "expected error mentioning empty query, got {body}"
6945        );
6946        h.shutdown(&runtime);
6947    }
6948
6949    #[test]
6950    fn inspect_document_endpoint_unknown_id_returns_404() {
6951        let runtime = rt();
6952        let h = Harness::new(&runtime);
6953        let r = h.router.clone();
6954        let (status, body) = runtime.block_on(call(
6955            r,
6956            "GET",
6957            "/memory/documents/00000000-0000-7000-8000-000000000000",
6958            None,
6959        ));
6960        assert_eq!(status, StatusCode::NOT_FOUND);
6961        assert!(body.get("error").is_some(), "got: {body}");
6962        h.shutdown(&runtime);
6963    }
6964
6965    #[test]
6966    fn inspect_document_endpoint_rejects_malformed_id() {
6967        let runtime = rt();
6968        let h = Harness::new(&runtime);
6969        let r = h.router.clone();
6970        let (status, _body) =
6971            runtime.block_on(call(r, "GET", "/memory/documents/not-a-uuid", None));
6972        assert_eq!(status, StatusCode::BAD_REQUEST);
6973        h.shutdown(&runtime);
6974    }
6975
6976    #[test]
6977    fn forget_document_endpoint_unknown_id_returns_404() {
6978        // Valid UUID format; no row exists → writer's `forget_document`
6979        // returns Error::NotFound → mapped to 404 by `ApiError::from`.
6980        let runtime = rt();
6981        let h = Harness::new(&runtime);
6982        let r = h.router.clone();
6983        let (status, _body) = runtime.block_on(call(
6984            r,
6985            "DELETE",
6986            "/memory/documents/00000000-0000-7000-8000-000000000000",
6987            None,
6988        ));
6989        assert_eq!(status, StatusCode::NOT_FOUND);
6990        h.shutdown(&runtime);
6991    }
6992
6993    #[test]
6994    fn forget_document_endpoint_rejects_malformed_id() {
6995        let runtime = rt();
6996        let h = Harness::new(&runtime);
6997        let r = h.router.clone();
6998        let (status, _body) =
6999            runtime.block_on(call(r, "DELETE", "/memory/documents/not-a-uuid", None));
7000        assert_eq!(status, StatusCode::BAD_REQUEST);
7001        h.shutdown(&runtime);
7002    }
7003
7004    #[test]
7005    fn document_endpoints_require_bearer_when_auth_enabled() {
7006        // All five doc endpoints sit behind the same authed Router and
7007        // must 401 without the bearer token. Mirrors
7008        // `derived_endpoints_require_bearer_when_auth_enabled`.
7009        let runtime = rt();
7010        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
7011        let cases: &[(&str, &str, Option<Value>)] = &[
7012            ("POST", "/memory/documents", Some(json!({ "path": "/x" }))),
7013            ("GET", "/memory/documents", None),
7014            (
7015                "POST",
7016                "/memory/documents/search",
7017                Some(json!({ "query": "x" })),
7018            ),
7019            (
7020                "GET",
7021                "/memory/documents/00000000-0000-7000-8000-000000000000",
7022                None,
7023            ),
7024            (
7025                "DELETE",
7026                "/memory/documents/00000000-0000-7000-8000-000000000000",
7027                None,
7028            ),
7029        ];
7030        for (method, path, body) in cases {
7031            let (status, _) =
7032                runtime.block_on(call(h.router.clone(), method, path, body.clone()));
7033            assert_eq!(
7034                status,
7035                StatusCode::UNAUTHORIZED,
7036                "{method} {path} should 401 without token"
7037            );
7038        }
7039        h.shutdown(&runtime);
7040    }
7041
7042    #[test]
7043    fn document_endpoints_accept_correct_bearer_token() {
7044        // Sanity check: with the right token, the same five endpoints
7045        // pass auth and reach the handler. We only assert that the
7046        // status code is NOT 401 — exact downstream behaviour depends
7047        // on the harness (no embedder → ingest/search would 500; empty
7048        // DB → list/inspect/forget return 200/404).
7049        let runtime = rt();
7050        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
7051        runtime.block_on(async {
7052            // GET /memory/documents → 200 + empty array (auth passes).
7053            let (status, _) = call_with_auth(
7054                h.router.clone(),
7055                "GET",
7056                "/memory/documents",
7057                None,
7058                Some("Bearer doc-secret"),
7059            )
7060            .await;
7061            assert_eq!(status, StatusCode::OK);
7062
7063            // GET /memory/documents/<unknown> → 404 (auth passes).
7064            let (status, _) = call_with_auth(
7065                h.router.clone(),
7066                "GET",
7067                "/memory/documents/00000000-0000-7000-8000-000000000000",
7068                None,
7069                Some("Bearer doc-secret"),
7070            )
7071            .await;
7072            assert_eq!(status, StatusCode::NOT_FOUND);
7073        });
7074        h.shutdown(&runtime);
7075    }
7076
7077    // ---------------------------------------------------------------------
7078    // v0.8.0 P2: tenant header extractor tests
7079    // ---------------------------------------------------------------------
7080
7081    /// `X-Solo-Tenant: default` resolves to the default tenant (which
7082    /// in the test harness is the only one wired in the registry).
7083    #[test]
7084    fn tenant_header_default_resolves() {
7085        let runtime = rt();
7086        let h = Harness::new(&runtime);
7087        let r = h.router.clone();
7088        let (status, _body) = runtime.block_on(async {
7089            let req = Request::builder()
7090                .method("GET")
7091                .uri("/memory/00000000-0000-7000-8000-000000000000")
7092                .header("x-solo-tenant", "default")
7093                .body(Body::empty())
7094                .unwrap();
7095            let resp = r.oneshot(req).await.expect("oneshot");
7096            let s = resp.status();
7097            let _b = resp.into_body().collect().await.unwrap().to_bytes();
7098            (s, _b)
7099        });
7100        // 404 because the id doesn't exist — but it's a routed 404 from
7101        // inspect_handler, not a 400 from a bad tenant header. That's
7102        // the proof point.
7103        assert_eq!(status, StatusCode::NOT_FOUND);
7104        h.shutdown(&runtime);
7105    }
7106
7107    /// `X-Solo-Tenant: UPPER` → 400 (invalid tenant id format).
7108    #[test]
7109    fn tenant_header_invalid_returns_400() {
7110        let runtime = rt();
7111        let h = Harness::new(&runtime);
7112        let r = h.router.clone();
7113        let (status, body) = runtime.block_on(async {
7114            let req = Request::builder()
7115                .method("GET")
7116                .uri("/memory/00000000-0000-7000-8000-000000000000")
7117                .header("x-solo-tenant", "UPPER")
7118                .body(Body::empty())
7119                .unwrap();
7120            let resp = r.oneshot(req).await.expect("oneshot");
7121            let s = resp.status();
7122            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
7123            let v: Value = serde_json::from_slice(&bytes).unwrap_or(Value::Null);
7124            (s, v)
7125        });
7126        assert_eq!(status, StatusCode::BAD_REQUEST);
7127        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
7128        assert!(
7129            msg.to_lowercase().contains("tenant") || msg.to_lowercase().contains("invalid"),
7130            "error must mention tenant/invalid: {msg}"
7131        );
7132        h.shutdown(&runtime);
7133    }
7134
7135    /// `X-Solo-Tenant: never-registered` → 404 (unknown tenant id).
7136    #[test]
7137    fn tenant_header_unknown_returns_404() {
7138        let runtime = rt();
7139        let h = Harness::new(&runtime);
7140        let r = h.router.clone();
7141        let (status, _body) = runtime.block_on(async {
7142            let req = Request::builder()
7143                .method("GET")
7144                .uri("/memory/00000000-0000-7000-8000-000000000000")
7145                .header("x-solo-tenant", "never-registered")
7146                .body(Body::empty())
7147                .unwrap();
7148            let resp = r.oneshot(req).await.expect("oneshot");
7149            let s = resp.status();
7150            let _b = resp.into_body().collect().await.unwrap().to_bytes();
7151            (s, _b)
7152        });
7153        assert_eq!(status, StatusCode::NOT_FOUND);
7154        h.shutdown(&runtime);
7155    }
7156
7157    /// No `X-Solo-Tenant` header → falls back to state.default_tenant.
7158    /// The reach-through to `inspect_handler` should produce the normal
7159    /// 404 for an unknown id rather than a tenant-routing error.
7160    #[test]
7161    fn tenant_header_missing_defaults_to_state_default_tenant() {
7162        let runtime = rt();
7163        let h = Harness::new(&runtime);
7164        let r = h.router.clone();
7165        let (status, _body) = runtime.block_on(async {
7166            let req = Request::builder()
7167                .method("GET")
7168                .uri("/memory/00000000-0000-7000-8000-000000000000")
7169                .body(Body::empty())
7170                .unwrap();
7171            let resp = r.oneshot(req).await.expect("oneshot");
7172            let s = resp.status();
7173            let _b = resp.into_body().collect().await.unwrap().to_bytes();
7174            (s, _b)
7175        });
7176        assert_eq!(status, StatusCode::NOT_FOUND);
7177        h.shutdown(&runtime);
7178    }
7179
7180    // ---------------------------------------------------------------------
7181    // v0.9.x: GET /v1/graph/expand
7182    //
7183    // Seeds tables directly via the Harness's side connection and walks
7184    // the four expansion kinds. The Harness is single-tenant (default);
7185    // the routing-isolation case is already covered by the
7186    // `tenant_header_*` tests above (an `X-Solo-Tenant: never-registered`
7187    // header against the same node_id surfaces 404 from the registry,
7188    // proving cross-tenant lookups can't bleed).
7189    // ---------------------------------------------------------------------
7190
7191    /// Insert one episode row directly. Returns its rowid for callers
7192    /// that need to wire `triples.source_episode_id`.
7193    fn seed_episode(
7194        conn: &rusqlite::Connection,
7195        memory_id: &str,
7196        ts_ms: i64,
7197        content: &str,
7198    ) -> i64 {
7199        conn.execute(
7200            "INSERT INTO episodes
7201                (memory_id, ts_ms, source_type, content,
7202                 encoding_context_json, tier, status,
7203                 confidence, strength, salience,
7204                 created_at_ms, updated_at_ms)
7205                VALUES (?1, ?2, 'user_message', ?3,
7206                        '{}', 'hot', 'active',
7207                        1.0, 0.5, 0.5, ?2, ?2)",
7208            rusqlite::params![memory_id, ts_ms, content],
7209        )
7210        .expect("seed episode");
7211        conn.last_insert_rowid()
7212    }
7213
7214    fn seed_cluster_row(conn: &rusqlite::Connection, cluster_id: &str, created_at_ms: i64) {
7215        conn.execute(
7216            "INSERT INTO clusters (cluster_id, coherence, created_at_ms)
7217                  VALUES (?1, 0.5, ?2)",
7218            rusqlite::params![cluster_id, created_at_ms],
7219        )
7220        .expect("seed cluster");
7221    }
7222
7223    fn seed_cluster_member(conn: &rusqlite::Connection, cluster_id: &str, memory_id: &str) {
7224        conn.execute(
7225            "INSERT INTO cluster_episodes (cluster_id, memory_id) VALUES (?1, ?2)",
7226            rusqlite::params![cluster_id, memory_id],
7227        )
7228        .expect("seed cluster_episodes");
7229    }
7230
7231    fn seed_document_row(conn: &rusqlite::Connection, doc_id: &str, title: &str) {
7232        conn.execute(
7233            "INSERT INTO documents
7234                (doc_id, source, title, mime_type, ingested_at_ms,
7235                 modified_at_ms, status, chunk_count, content_hash, byte_size)
7236                VALUES (?1, ?2, ?3, 'text/plain', 0, NULL,
7237                        'active', 0, ?1, NULL)",
7238            rusqlite::params![doc_id, format!("/tmp/{title}.txt"), title],
7239        )
7240        .expect("seed doc");
7241    }
7242
7243    fn seed_chunk_row(
7244        conn: &rusqlite::Connection,
7245        chunk_id: &str,
7246        doc_id: &str,
7247        chunk_index: i64,
7248        content: &str,
7249    ) {
7250        conn.execute(
7251            "INSERT INTO document_chunks
7252                (chunk_id, doc_id, chunk_index, content,
7253                 token_count, start_offset, end_offset, created_at_ms)
7254                VALUES (?1, ?2, ?3, ?4, 1, 0, ?5, 0)",
7255            rusqlite::params![chunk_id, doc_id, chunk_index, content, content.len() as i64],
7256        )
7257        .expect("seed chunk");
7258    }
7259
7260    fn seed_triple_row(
7261        conn: &rusqlite::Connection,
7262        triple_id: &str,
7263        subject: &str,
7264        predicate: &str,
7265        object: &str,
7266        source_episode_rowid: Option<i64>,
7267    ) {
7268        conn.execute(
7269            "INSERT INTO triples
7270                 (triple_id, subject_id, predicate, object_id, object_kind,
7271                  valid_from_ms, valid_to_ms, confidence, provenance_json,
7272                  status, created_at_ms, updated_at_ms, source_episode_id)
7273                 VALUES (?1, ?2, ?3, ?4, 'literal', 0, NULL, 0.9, '{}',
7274                         'active', 0, 0, ?5)",
7275            rusqlite::params![triple_id, subject, predicate, object, source_episode_rowid],
7276        )
7277        .expect("seed triple");
7278    }
7279
7280    /// Insert a `semantic_abstractions` row (cluster LLM summary). Used
7281    /// by the cluster-inspect test to verify the abstraction concat path.
7282    fn seed_abstraction_row(
7283        conn: &rusqlite::Connection,
7284        abstraction_id: &str,
7285        cluster_id: &str,
7286        content: &str,
7287    ) {
7288        conn.execute(
7289            "INSERT INTO semantic_abstractions
7290                 (abstraction_id, cluster_id, content, provenance_json,
7291                  confidence, created_at_ms)
7292                 VALUES (?1, ?2, ?3, '{}', 0.9, 0)",
7293            rusqlite::params![abstraction_id, cluster_id, content],
7294        )
7295        .expect("seed abstraction");
7296    }
7297
7298    /// Tests use simple ASCII node_ids (UUID-shaped + plain entity strings),
7299    /// so we percent-encode only `:` and a few other delimiters by hand.
7300    fn percent_encode_node_id(node_id: &str) -> String {
7301        let mut out = String::with_capacity(node_id.len());
7302        for c in node_id.chars() {
7303            match c {
7304                ':' => out.push_str("%3A"),
7305                ' ' => out.push_str("%20"),
7306                '&' => out.push_str("%26"),
7307                '+' => out.push_str("%2B"),
7308                '?' => out.push_str("%3F"),
7309                '#' => out.push_str("%23"),
7310                _ => out.push(c),
7311            }
7312        }
7313        out
7314    }
7315
7316    fn graph_uri(node_id: &str, kind: &str) -> String {
7317        let encoded = percent_encode_node_id(node_id);
7318        format!("/v1/graph/expand?node_id={encoded}&kind={kind}")
7319    }
7320
7321    fn graph_uri_with_limit(node_id: &str, kind: &str, limit: u32) -> String {
7322        let encoded = percent_encode_node_id(node_id);
7323        format!("/v1/graph/expand?node_id={encoded}&kind={kind}&limit={limit}")
7324    }
7325
7326    #[test]
7327    fn expand_cluster_member_from_episode_returns_clusters() {
7328        let runtime = rt();
7329        let h = Harness::new(&runtime);
7330        let memory_id = "11111111-1111-7000-8000-000000000001";
7331        {
7332            let conn = h.open_db();
7333            seed_episode(&conn, memory_id, 100, "ep content");
7334            seed_cluster_row(&conn, "cl-a", 200);
7335            seed_cluster_member(&conn, "cl-a", memory_id);
7336        }
7337        let node_id = format!("ep:{memory_id}");
7338        let (status, body) = runtime.block_on(call(
7339            h.router.clone(),
7340            "GET",
7341            &graph_uri(&node_id, "cluster_member"),
7342            None,
7343        ));
7344        assert_eq!(status, StatusCode::OK, "body: {body}");
7345        let nodes = body.get("nodes").and_then(|v| v.as_array()).expect("nodes array");
7346        let edges = body.get("edges").and_then(|v| v.as_array()).expect("edges array");
7347        assert_eq!(nodes.len(), 1, "{body}");
7348        assert_eq!(nodes[0]["id"], "cl:cl-a");
7349        assert_eq!(nodes[0]["kind"], "cluster");
7350        assert_eq!(edges.len(), 1);
7351        assert_eq!(edges[0]["source"], node_id);
7352        assert_eq!(edges[0]["target"], "cl:cl-a");
7353        assert_eq!(edges[0]["kind"], "cluster_member");
7354        h.shutdown(&runtime);
7355    }
7356
7357    #[test]
7358    fn expand_cluster_member_from_cluster_returns_episodes() {
7359        let runtime = rt();
7360        let h = Harness::new(&runtime);
7361        {
7362            let conn = h.open_db();
7363            seed_cluster_row(&conn, "cl-multi", 500);
7364            for i in 0..5 {
7365                let mid = format!("2222{i}222-2222-7000-8000-000000000001");
7366                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
7367                seed_cluster_member(&conn, "cl-multi", &mid);
7368            }
7369        }
7370        let (status, body) = runtime.block_on(call(
7371            h.router.clone(),
7372            "GET",
7373            &graph_uri_with_limit("cl:cl-multi", "cluster_member", 3),
7374            None,
7375        ));
7376        assert_eq!(status, StatusCode::OK, "body: {body}");
7377        let nodes = body["nodes"].as_array().unwrap();
7378        let edges = body["edges"].as_array().unwrap();
7379        assert_eq!(nodes.len(), 3, "limit honored: {body}");
7380        assert_eq!(edges.len(), 3);
7381        for n in nodes {
7382            assert_eq!(n["kind"], "episode");
7383        }
7384        h.shutdown(&runtime);
7385    }
7386
7387    #[test]
7388    fn expand_document_chunk_from_document_returns_chunks() {
7389        let runtime = rt();
7390        let h = Harness::new(&runtime);
7391        let doc_id = "33333333-3333-7000-8000-000000000001";
7392        {
7393            let conn = h.open_db();
7394            seed_document_row(&conn, doc_id, "doc A");
7395            // Insert chunks in shuffled order so the ORDER BY chunk_index
7396            // is load-bearing.
7397            seed_chunk_row(&conn, "c2", doc_id, 2, "chunk 2 text");
7398            seed_chunk_row(&conn, "c0", doc_id, 0, "chunk 0 text");
7399            seed_chunk_row(&conn, "c1", doc_id, 1, "chunk 1 text");
7400            seed_chunk_row(&conn, "c3", doc_id, 3, "chunk 3 text");
7401        }
7402        let node_id = format!("doc:{doc_id}");
7403        let (status, body) = runtime.block_on(call(
7404            h.router.clone(),
7405            "GET",
7406            &graph_uri(&node_id, "document_chunk"),
7407            None,
7408        ));
7409        assert_eq!(status, StatusCode::OK, "body: {body}");
7410        let nodes = body["nodes"].as_array().unwrap();
7411        let edges = body["edges"].as_array().unwrap();
7412        assert_eq!(nodes.len(), 4);
7413        assert_eq!(edges.len(), 4);
7414        // Verify in-order chunk_index emission.
7415        assert_eq!(nodes[0]["id"], "chunk:c0");
7416        assert_eq!(nodes[1]["id"], "chunk:c1");
7417        assert_eq!(nodes[2]["id"], "chunk:c2");
7418        assert_eq!(nodes[3]["id"], "chunk:c3");
7419        for e in edges {
7420            assert_eq!(e["kind"], "document_chunk");
7421        }
7422        h.shutdown(&runtime);
7423    }
7424
7425    #[test]
7426    fn expand_document_chunk_from_chunk_returns_parent_document() {
7427        let runtime = rt();
7428        let h = Harness::new(&runtime);
7429        let doc_id = "44444444-4444-7000-8000-000000000001";
7430        {
7431            let conn = h.open_db();
7432            seed_document_row(&conn, doc_id, "parent doc");
7433            seed_chunk_row(&conn, "c-orphan", doc_id, 0, "chunk content");
7434        }
7435        let (status, body) = runtime.block_on(call(
7436            h.router.clone(),
7437            "GET",
7438            &graph_uri("chunk:c-orphan", "document_chunk"),
7439            None,
7440        ));
7441        assert_eq!(status, StatusCode::OK, "body: {body}");
7442        let nodes = body["nodes"].as_array().unwrap();
7443        let edges = body["edges"].as_array().unwrap();
7444        assert_eq!(nodes.len(), 1);
7445        assert_eq!(edges.len(), 1);
7446        assert_eq!(nodes[0]["id"], format!("doc:{doc_id}"));
7447        assert_eq!(edges[0]["source"], "chunk:c-orphan");
7448        assert_eq!(edges[0]["target"], format!("doc:{doc_id}"));
7449        h.shutdown(&runtime);
7450    }
7451
7452    #[test]
7453    fn expand_triple_from_episode_returns_entities() {
7454        let runtime = rt();
7455        let h = Harness::new(&runtime);
7456        let memory_id = "55555555-5555-7000-8000-000000000001";
7457        let rowid;
7458        {
7459            let conn = h.open_db();
7460            rowid = seed_episode(&conn, memory_id, 100, "alice works at anthropic");
7461            // Two distinct triples → 4 entity endpoints (Alice, Anthropic, Bob, NYC).
7462            seed_triple_row(&conn, "t1", "Alice", "works_at", "Anthropic", Some(rowid));
7463            seed_triple_row(&conn, "t2", "Bob", "lives_in", "NYC", Some(rowid));
7464        }
7465        let node_id = format!("ep:{memory_id}");
7466        let (status, body) = runtime.block_on(call(
7467            h.router.clone(),
7468            "GET",
7469            &graph_uri(&node_id, "triple"),
7470            None,
7471        ));
7472        assert_eq!(status, StatusCode::OK, "body: {body}");
7473        let nodes = body["nodes"].as_array().unwrap();
7474        let edges = body["edges"].as_array().unwrap();
7475        assert_eq!(nodes.len(), 4, "expected 4 unique entity nodes: {body}");
7476        assert_eq!(edges.len(), 2);
7477        let ids: std::collections::HashSet<String> = nodes
7478            .iter()
7479            .map(|n| n["id"].as_str().unwrap().to_string())
7480            .collect();
7481        for expected in ["ent:Alice", "ent:Anthropic", "ent:Bob", "ent:NYC"] {
7482            assert!(ids.contains(expected), "missing {expected} in {body}");
7483        }
7484        for e in edges {
7485            assert_eq!(e["kind"], "triple");
7486            assert!(e["predicate"].is_string(), "predicate set: {body}");
7487        }
7488        h.shutdown(&runtime);
7489    }
7490
7491    #[test]
7492    fn expand_triple_from_entity_returns_episodes() {
7493        let runtime = rt();
7494        let h = Harness::new(&runtime);
7495        {
7496            let conn = h.open_db();
7497            let r1 = seed_episode(
7498                &conn,
7499                "66666666-6666-7000-8000-000000000001",
7500                100,
7501                "alice ep one",
7502            );
7503            let r2 = seed_episode(
7504                &conn,
7505                "66666666-6666-7000-8000-000000000002",
7506                200,
7507                "alice ep two",
7508            );
7509            let r3 = seed_episode(
7510                &conn,
7511                "66666666-6666-7000-8000-000000000003",
7512                300,
7513                "alice ep three",
7514            );
7515            // 3 triples all mentioning Alice on one side or another.
7516            seed_triple_row(&conn, "t1", "Alice", "p", "Bob", Some(r1));
7517            seed_triple_row(&conn, "t2", "Carol", "p", "Alice", Some(r2));
7518            seed_triple_row(&conn, "t3", "Alice", "q", "Dave", Some(r3));
7519            // One triple with no source — must be skipped by the IS NOT NULL filter.
7520            seed_triple_row(&conn, "t-orphan", "Alice", "p", "Eve", None);
7521        }
7522        let (status, body) = runtime.block_on(call(
7523            h.router.clone(),
7524            "GET",
7525            &graph_uri("ent:Alice", "triple"),
7526            None,
7527        ));
7528        assert_eq!(status, StatusCode::OK, "body: {body}");
7529        let nodes = body["nodes"].as_array().unwrap();
7530        let edges = body["edges"].as_array().unwrap();
7531        assert_eq!(nodes.len(), 3, "expected 3 episodes: {body}");
7532        assert_eq!(edges.len(), 3);
7533        for n in nodes {
7534            assert_eq!(n["kind"], "episode");
7535        }
7536        for e in edges {
7537            assert_eq!(e["source"], "ent:Alice");
7538            assert_eq!(e["kind"], "triple");
7539        }
7540        h.shutdown(&runtime);
7541    }
7542
7543    #[test]
7544    fn expand_semantic_from_episode_returns_similar() {
7545        let runtime = rt();
7546        let h = Harness::new(&runtime);
7547        // Seed three episodes via the writer-actor so they get embedded
7548        // + inserted into HNSW. StubEmbedder is deterministic: identical
7549        // content → identical vector → cos_distance = 0. So we use
7550        // distinct strings, then expand from one of them and assert at
7551        // least one similar peer comes back.
7552        runtime.block_on(async {
7553            let mid1 = post_remember(h.router.clone(), "alpha alpha alpha").await;
7554            let _mid2 = post_remember(h.router.clone(), "beta beta beta").await;
7555            let _mid3 = post_remember(h.router.clone(), "gamma gamma gamma").await;
7556            // Expand from mid1.
7557            let (status, body) = call(
7558                h.router.clone(),
7559                "GET",
7560                &graph_uri_with_limit(&format!("ep:{mid1}"), "semantic", 5),
7561                None,
7562            )
7563            .await;
7564            assert_eq!(status, StatusCode::OK, "body: {body}");
7565            let nodes = body["nodes"].as_array().unwrap();
7566            let edges = body["edges"].as_array().unwrap();
7567            // Must NOT include the source.
7568            for n in nodes {
7569                assert_ne!(
7570                    n["id"].as_str().unwrap(),
7571                    format!("ep:{mid1}"),
7572                    "self must be excluded: {body}"
7573                );
7574            }
7575            // Edges must be tagged semantic with a numeric weight.
7576            for e in edges {
7577                assert_eq!(e["kind"], "semantic");
7578                assert!(e["weight"].is_number(), "weight set: {body}");
7579            }
7580        });
7581        h.shutdown(&runtime);
7582    }
7583
7584    /// Helper: POST /memory and return the new memory_id.
7585    async fn post_remember(router: axum::Router, content: &str) -> String {
7586        let (status, body) = call(
7587            router,
7588            "POST",
7589            "/memory",
7590            Some(json!({ "content": content })),
7591        )
7592        .await;
7593        assert_eq!(status, StatusCode::OK, "post failed: {body}");
7594        body["memory_id"].as_str().unwrap().to_string()
7595    }
7596
7597    #[test]
7598    fn expand_400_on_invalid_kind() {
7599        let runtime = rt();
7600        let h = Harness::new(&runtime);
7601        let (status, _body) = runtime.block_on(call(
7602            h.router.clone(),
7603            "GET",
7604            "/v1/graph/expand?node_id=ep:any&kind=banana",
7605            None,
7606        ));
7607        // axum's Query extractor rejects unknown enum value with 400/422.
7608        assert!(
7609            status == StatusCode::BAD_REQUEST || status == StatusCode::UNPROCESSABLE_ENTITY,
7610            "expected 400/422 for bad kind, got {status}"
7611        );
7612        h.shutdown(&runtime);
7613    }
7614
7615    #[test]
7616    fn expand_400_on_invalid_node_for_kind() {
7617        let runtime = rt();
7618        let h = Harness::new(&runtime);
7619        // kind=semantic from a cluster source → 400.
7620        let (status, body) = runtime.block_on(call(
7621            h.router.clone(),
7622            "GET",
7623            &graph_uri("cl:doesnt-matter", "semantic"),
7624            None,
7625        ));
7626        assert_eq!(status, StatusCode::BAD_REQUEST);
7627        assert!(
7628            body["error"]
7629                .as_str()
7630                .is_some_and(|s| s.contains("semantic only valid for episode")),
7631            "got: {body}"
7632        );
7633        h.shutdown(&runtime);
7634    }
7635
7636    #[test]
7637    fn expand_404_on_missing_node_id() {
7638        let runtime = rt();
7639        let h = Harness::new(&runtime);
7640        let (status, body) = runtime.block_on(call(
7641            h.router.clone(),
7642            "GET",
7643            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
7644            None,
7645        ));
7646        assert_eq!(status, StatusCode::NOT_FOUND, "{body}");
7647        h.shutdown(&runtime);
7648    }
7649
7650    #[test]
7651    fn expand_limit_clamped_at_100() {
7652        let runtime = rt();
7653        let h = Harness::new(&runtime);
7654        // Seed > 100 cluster members so we can see the clamp in action.
7655        {
7656            let conn = h.open_db();
7657            seed_cluster_row(&conn, "cl-huge", 1_000);
7658            for i in 0..150 {
7659                let mid = format!("77777777-7777-7000-8000-{:012}", i);
7660                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
7661                seed_cluster_member(&conn, "cl-huge", &mid);
7662            }
7663        }
7664        let (status, body) = runtime.block_on(call(
7665            h.router.clone(),
7666            "GET",
7667            &graph_uri_with_limit("cl:cl-huge", "cluster_member", 999),
7668            None,
7669        ));
7670        assert_eq!(status, StatusCode::OK, "body: {body}");
7671        let nodes = body["nodes"].as_array().unwrap();
7672        assert_eq!(
7673            nodes.len(),
7674            100,
7675            "limit must be silently clamped to 100, got {}",
7676            nodes.len()
7677        );
7678        h.shutdown(&runtime);
7679    }
7680
7681    #[test]
7682    fn expand_bad_node_id_prefix_returns_400() {
7683        let runtime = rt();
7684        let h = Harness::new(&runtime);
7685        let (status, body) = runtime.block_on(call(
7686            h.router.clone(),
7687            "GET",
7688            "/v1/graph/expand?node_id=garbage&kind=cluster_member",
7689            None,
7690        ));
7691        assert_eq!(status, StatusCode::BAD_REQUEST);
7692        assert!(
7693            body["error"]
7694                .as_str()
7695                .is_some_and(|s| s.contains("node_id must be")),
7696            "got: {body}"
7697        );
7698        h.shutdown(&runtime);
7699    }
7700
7701    #[test]
7702    fn expand_respects_tenant_scoping_via_unknown_tenant_header() {
7703        // Routing via X-Solo-Tenant: a header pointing to an unknown
7704        // tenant must 404 before the handler even runs — the
7705        // TenantExtractor is the gatekeeper, so node ids can't be
7706        // resolved against the wrong tenant's DB.
7707        let runtime = rt();
7708        let h = Harness::new(&runtime);
7709        // Seed a real episode in the default tenant so we know it
7710        // exists there. If tenant scoping leaked, this lookup would 200
7711        // even with the wrong tenant header.
7712        let memory_id = "88888888-8888-7000-8000-000000000001";
7713        {
7714            let conn = h.open_db();
7715            seed_episode(&conn, memory_id, 100, "scoped");
7716            seed_cluster_row(&conn, "cl-scoped", 200);
7717            seed_cluster_member(&conn, "cl-scoped", memory_id);
7718        }
7719        let node_id = format!("ep:{memory_id}");
7720        let r = h.router.clone();
7721        let (status, _body) = runtime.block_on(async {
7722            let req = Request::builder()
7723                .method("GET")
7724                .uri(graph_uri(&node_id, "cluster_member"))
7725                .header("x-solo-tenant", "never-registered-tenant")
7726                .body(Body::empty())
7727                .unwrap();
7728            let resp = r.oneshot(req).await.expect("oneshot");
7729            let s = resp.status();
7730            let _b = resp.into_body().collect().await.unwrap().to_bytes();
7731            (s, _b)
7732        });
7733        // Unknown tenant id → 404 from the registry. Confirms cross-tenant
7734        // lookups can't smuggle through this endpoint.
7735        assert_eq!(status, StatusCode::NOT_FOUND);
7736        h.shutdown(&runtime);
7737    }
7738
7739    #[test]
7740    fn expand_respects_auth_when_enabled() {
7741        let runtime = rt();
7742        let h = Harness::new_with_auth(&runtime, Some("graph-secret".into()));
7743        // No Authorization header → 401.
7744        let (status, _) = runtime.block_on(call(
7745            h.router.clone(),
7746            "GET",
7747            &graph_uri("ep:any", "cluster_member"),
7748            None,
7749        ));
7750        assert_eq!(status, StatusCode::UNAUTHORIZED);
7751        // Right token → handler runs (404 for unknown node, NOT 401).
7752        let (status, _) = runtime.block_on(call_with_auth(
7753            h.router.clone(),
7754            "GET",
7755            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
7756            None,
7757            Some("Bearer graph-secret"),
7758        ));
7759        assert_eq!(status, StatusCode::NOT_FOUND);
7760        h.shutdown(&runtime);
7761    }
7762
7763    #[test]
7764    fn expand_works_when_auth_none() {
7765        let runtime = rt();
7766        let h = Harness::new(&runtime);
7767        // Unauthenticated request hits the handler; 404 for unknown node
7768        // proves the auth-none path doesn't reject the request.
7769        let (status, _) = runtime.block_on(call(
7770            h.router.clone(),
7771            "GET",
7772            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
7773            None,
7774        ));
7775        assert_eq!(status, StatusCode::NOT_FOUND);
7776        h.shutdown(&runtime);
7777    }
7778
7779    // ---------------------------------------------------------------------
7780    // v0.10.0: GET /v1/graph/nodes + GET /v1/graph/edges
7781    //
7782    // Paginated catalog reads. Both endpoints share auth + tenant +
7783    // cursor scaffolding from /v1/graph/expand, so tests focus on the
7784    // new surface: filter parsing, entity synthesis cap, cursor round-
7785    // trip, edge-type defaults (semantic excluded), and the semantic
7786    // 400 redirect to /v1/graph/neighbors.
7787    // ---------------------------------------------------------------------
7788
7789    /// Lower-level helper that captures response headers in addition to
7790    /// status + JSON body. Used by the entity-cap header test.
7791    async fn call_with_headers(
7792        router: axum::Router,
7793        method: &str,
7794        uri: &str,
7795    ) -> (StatusCode, axum::http::HeaderMap, Value) {
7796        let req = Request::builder()
7797            .method(method)
7798            .uri(uri)
7799            .header("content-length", "0")
7800            .body(Body::empty())
7801            .unwrap();
7802        let resp = router.oneshot(req).await.expect("oneshot");
7803        let status = resp.status();
7804        let headers = resp.headers().clone();
7805        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
7806        let v: Value = if body_bytes.is_empty() {
7807            Value::Null
7808        } else {
7809            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
7810        };
7811        (status, headers, v)
7812    }
7813
7814    #[test]
7815    fn nodes_returns_all_kinds_when_no_filter() {
7816        let runtime = rt();
7817        let h = Harness::new(&runtime);
7818        {
7819            let conn = h.open_db();
7820            let rowid = seed_episode(
7821                &conn,
7822                "aaaaaaaa-0000-7000-8000-000000000001",
7823                100,
7824                "episode one",
7825            );
7826            seed_document_row(&conn, "doc-1", "doc one");
7827            seed_chunk_row(&conn, "chunk-1", "doc-1", 0, "chunk one body");
7828            seed_cluster_row(&conn, "cl-one", 200);
7829            seed_triple_row(
7830                &conn,
7831                "t-one",
7832                "Alice",
7833                "knows",
7834                "Bob",
7835                Some(rowid),
7836            );
7837        }
7838        let (status, body) = runtime.block_on(call(
7839            h.router.clone(),
7840            "GET",
7841            "/v1/graph/nodes",
7842            None,
7843        ));
7844        assert_eq!(status, StatusCode::OK, "body: {body}");
7845        let nodes = body["nodes"].as_array().unwrap();
7846        let kinds: std::collections::HashSet<&str> = nodes
7847            .iter()
7848            .map(|n| n["kind"].as_str().unwrap())
7849            .collect();
7850        for expected in ["episode", "document", "chunk", "cluster", "entity"] {
7851            assert!(
7852                kinds.contains(expected),
7853                "expected {expected} kind in response: {body}"
7854            );
7855        }
7856        h.shutdown(&runtime);
7857    }
7858
7859    #[test]
7860    fn nodes_filter_by_single_kind() {
7861        let runtime = rt();
7862        let h = Harness::new(&runtime);
7863        {
7864            let conn = h.open_db();
7865            seed_episode(&conn, "bbbbbbbb-0000-7000-8000-000000000001", 100, "ep");
7866            seed_document_row(&conn, "doc-only", "d");
7867            seed_cluster_row(&conn, "cl-only", 300);
7868        }
7869        let (status, body) = runtime.block_on(call(
7870            h.router.clone(),
7871            "GET",
7872            "/v1/graph/nodes?kind=episode",
7873            None,
7874        ));
7875        assert_eq!(status, StatusCode::OK, "body: {body}");
7876        let nodes = body["nodes"].as_array().unwrap();
7877        assert!(!nodes.is_empty(), "{body}");
7878        for n in nodes {
7879            assert_eq!(n["kind"], "episode", "kind filter must be exclusive: {body}");
7880        }
7881        h.shutdown(&runtime);
7882    }
7883
7884    #[test]
7885    fn nodes_filter_by_multiple_kinds() {
7886        let runtime = rt();
7887        let h = Harness::new(&runtime);
7888        {
7889            let conn = h.open_db();
7890            seed_episode(&conn, "cccccccc-0000-7000-8000-000000000001", 100, "ep");
7891            seed_document_row(&conn, "doc-multi", "d");
7892            seed_cluster_row(&conn, "cl-multi", 300);
7893        }
7894        let (status, body) = runtime.block_on(call(
7895            h.router.clone(),
7896            "GET",
7897            "/v1/graph/nodes?kind=episode,document",
7898            None,
7899        ));
7900        assert_eq!(status, StatusCode::OK, "body: {body}");
7901        let nodes = body["nodes"].as_array().unwrap();
7902        let kinds: std::collections::HashSet<&str> = nodes
7903            .iter()
7904            .map(|n| n["kind"].as_str().unwrap())
7905            .collect();
7906        assert!(kinds.contains("episode"), "{body}");
7907        assert!(kinds.contains("document"), "{body}");
7908        assert!(
7909            !kinds.contains("cluster"),
7910            "cluster must be filtered out: {body}"
7911        );
7912        h.shutdown(&runtime);
7913    }
7914
7915    #[test]
7916    fn nodes_entity_synthesis_caps_at_200() {
7917        let runtime = rt();
7918        let h = Harness::new(&runtime);
7919        {
7920            let conn = h.open_db();
7921            // Seed one episode + 250 distinct triple object values so the
7922            // entity rollup surfaces >200 entities. ref_count is 1 for
7923            // each; pick subject = "Alice" for all so the entity count
7924            // collapses on subject (1 "Alice") + 250 distinct objects.
7925            let rowid = seed_episode(
7926                &conn,
7927                "dddddddd-0000-7000-8000-000000000001",
7928                100,
7929                "ep",
7930            );
7931            for i in 0..250 {
7932                let triple_id = format!("t-cap-{i:03}");
7933                let obj = format!("Entity{i:03}");
7934                seed_triple_row(&conn, &triple_id, "Alice", "knows", &obj, Some(rowid));
7935            }
7936        }
7937        let (status, headers, body) = runtime.block_on(call_with_headers(
7938            h.router.clone(),
7939            "GET",
7940            "/v1/graph/nodes?kind=entity&limit=500",
7941        ));
7942        assert_eq!(status, StatusCode::OK, "body: {body}");
7943        let nodes = body["nodes"].as_array().unwrap();
7944        assert_eq!(
7945            nodes.len(),
7946            200,
7947            "entity cap must be enforced at 200, got {}",
7948            nodes.len()
7949        );
7950        assert_eq!(
7951            headers
7952                .get("x-solo-entity-cap-reached")
7953                .and_then(|v| v.to_str().ok()),
7954            Some("true"),
7955            "cap-reached header missing: headers={headers:?}"
7956        );
7957        for n in nodes {
7958            assert_eq!(n["kind"], "entity");
7959        }
7960        h.shutdown(&runtime);
7961    }
7962
7963    #[test]
7964    fn nodes_since_until_filter_works() {
7965        let runtime = rt();
7966        let h = Harness::new(&runtime);
7967        {
7968            let conn = h.open_db();
7969            seed_episode(
7970                &conn,
7971                "eeeeeeee-0000-7000-8000-000000000001",
7972                100,
7973                "early",
7974            );
7975            seed_episode(
7976                &conn,
7977                "eeeeeeee-0000-7000-8000-000000000002",
7978                500,
7979                "middle",
7980            );
7981            seed_episode(
7982                &conn,
7983                "eeeeeeee-0000-7000-8000-000000000003",
7984                1000,
7985                "late",
7986            );
7987        }
7988        let (status, body) = runtime.block_on(call(
7989            h.router.clone(),
7990            "GET",
7991            "/v1/graph/nodes?kind=episode&since_ms=400&until_ms=600",
7992            None,
7993        ));
7994        assert_eq!(status, StatusCode::OK, "body: {body}");
7995        let nodes = body["nodes"].as_array().unwrap();
7996        assert_eq!(nodes.len(), 1, "{body}");
7997        assert_eq!(
7998            nodes[0]["id"],
7999            "ep:eeeeeeee-0000-7000-8000-000000000002"
8000        );
8001        h.shutdown(&runtime);
8002    }
8003
8004    #[test]
8005    fn nodes_pagination_round_trip() {
8006        let runtime = rt();
8007        let h = Harness::new(&runtime);
8008        {
8009            let conn = h.open_db();
8010            for i in 0..150 {
8011                let mid = format!("f0000000-0000-7000-8000-{i:012}");
8012                // ts_ms scales with i so the sort order is deterministic;
8013                // newest (highest i) appears first.
8014                seed_episode(&conn, &mid, 1_000 + i as i64, "page");
8015            }
8016        }
8017        let limit = 50u32;
8018        let mut seen: std::collections::HashSet<String> = Default::default();
8019        let mut next_cursor: Option<String> = None;
8020        for page_idx in 0..4 {
8021            let cursor_param = next_cursor
8022                .as_deref()
8023                .map(|c| format!("&cursor={c}"))
8024                .unwrap_or_default();
8025            let uri = format!(
8026                "/v1/graph/nodes?kind=episode&limit={limit}{cursor_param}"
8027            );
8028            let (status, body) =
8029                runtime.block_on(call(h.router.clone(), "GET", &uri, None));
8030            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
8031            let nodes = body["nodes"].as_array().unwrap();
8032            assert!(
8033                nodes.len() <= limit as usize,
8034                "page {page_idx} over-fetched: {body}"
8035            );
8036            for n in nodes {
8037                let id = n["id"].as_str().unwrap().to_string();
8038                assert!(seen.insert(id.clone()), "duplicate id across pages: {id}");
8039            }
8040            next_cursor = body
8041                .get("next_cursor")
8042                .and_then(|v| v.as_str())
8043                .map(|s| s.to_string());
8044            if next_cursor.is_none() {
8045                break;
8046            }
8047        }
8048        assert_eq!(
8049            seen.len(),
8050            150,
8051            "expected 150 distinct ids across pages, got {}",
8052            seen.len()
8053        );
8054        assert!(
8055            next_cursor.is_none(),
8056            "cursor should be null after last page; got {next_cursor:?}"
8057        );
8058        h.shutdown(&runtime);
8059    }
8060
8061    #[test]
8062    fn nodes_respects_tenant_scoping() {
8063        let runtime = rt();
8064        let h = Harness::new(&runtime);
8065        {
8066            let conn = h.open_db();
8067            seed_episode(
8068                &conn,
8069                "11110000-0000-7000-8000-000000000001",
8070                100,
8071                "tenant scope",
8072            );
8073        }
8074        // Request against a never-registered tenant header → 404 from
8075        // the tenant extractor before the handler runs.
8076        let r = h.router.clone();
8077        let (status, _body) = runtime.block_on(async {
8078            let req = Request::builder()
8079                .method("GET")
8080                .uri("/v1/graph/nodes")
8081                .header("x-solo-tenant", "never-registered-tenant")
8082                .body(Body::empty())
8083                .unwrap();
8084            let resp = r.oneshot(req).await.expect("oneshot");
8085            let s = resp.status();
8086            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8087            (s, _b)
8088        });
8089        assert_eq!(status, StatusCode::NOT_FOUND);
8090        h.shutdown(&runtime);
8091    }
8092
8093    #[test]
8094    fn nodes_respects_auth_when_enabled() {
8095        let runtime = rt();
8096        let h = Harness::new_with_auth(&runtime, Some("nodes-secret".into()));
8097        let (status, _) = runtime.block_on(call(
8098            h.router.clone(),
8099            "GET",
8100            "/v1/graph/nodes",
8101            None,
8102        ));
8103        assert_eq!(
8104            status,
8105            StatusCode::UNAUTHORIZED,
8106            "must reject unauthenticated request"
8107        );
8108        let (status, _) = runtime.block_on(call_with_auth(
8109            h.router.clone(),
8110            "GET",
8111            "/v1/graph/nodes",
8112            None,
8113            Some("Bearer nodes-secret"),
8114        ));
8115        assert_eq!(status, StatusCode::OK, "must pass through with bearer");
8116        h.shutdown(&runtime);
8117    }
8118
8119    #[test]
8120    fn nodes_works_with_auth_none() {
8121        let runtime = rt();
8122        let h = Harness::new(&runtime);
8123        let (status, body) = runtime.block_on(call(
8124            h.router.clone(),
8125            "GET",
8126            "/v1/graph/nodes",
8127            None,
8128        ));
8129        assert_eq!(status, StatusCode::OK, "{body}");
8130        assert!(body.get("nodes").is_some());
8131        h.shutdown(&runtime);
8132    }
8133
8134    // --- /v1/graph/edges ---
8135
8136    #[test]
8137    fn edges_returns_all_default_kinds() {
8138        let runtime = rt();
8139        let h = Harness::new(&runtime);
8140        {
8141            let conn = h.open_db();
8142            let rowid = seed_episode(
8143                &conn,
8144                "22220000-0000-7000-8000-000000000001",
8145                100,
8146                "ep src",
8147            );
8148            seed_triple_row(&conn, "t-def", "Alice", "knows", "Bob", Some(rowid));
8149            seed_document_row(&conn, "doc-e", "doc");
8150            seed_chunk_row(&conn, "c-e", "doc-e", 0, "chunk");
8151            seed_cluster_row(&conn, "cl-e", 200);
8152            seed_cluster_member(
8153                &conn,
8154                "cl-e",
8155                "22220000-0000-7000-8000-000000000001",
8156            );
8157        }
8158        let (status, body) = runtime.block_on(call(
8159            h.router.clone(),
8160            "GET",
8161            "/v1/graph/edges",
8162            None,
8163        ));
8164        assert_eq!(status, StatusCode::OK, "body: {body}");
8165        let edges = body["edges"].as_array().unwrap();
8166        let kinds: std::collections::HashSet<&str> = edges
8167            .iter()
8168            .map(|e| e["kind"].as_str().unwrap())
8169            .collect();
8170        assert!(kinds.contains("triple"), "{body}");
8171        assert!(kinds.contains("document_chunk"), "{body}");
8172        assert!(kinds.contains("cluster_member"), "{body}");
8173        assert!(
8174            !kinds.contains("semantic"),
8175            "semantic is NOT in default response: {body}"
8176        );
8177        h.shutdown(&runtime);
8178    }
8179
8180    #[test]
8181    fn edges_filter_by_node_id_finds_incident_edges() {
8182        let runtime = rt();
8183        let h = Harness::new(&runtime);
8184        let memory_id = "33330000-0000-7000-8000-000000000001";
8185        {
8186            let conn = h.open_db();
8187            let rowid = seed_episode(&conn, memory_id, 100, "ep multi-triple");
8188            seed_triple_row(&conn, "t-a", "Alice", "p", "Bob", Some(rowid));
8189            seed_triple_row(&conn, "t-b", "Alice", "p", "Carol", Some(rowid));
8190            seed_triple_row(&conn, "t-c", "Alice", "p", "Dave", Some(rowid));
8191            // Decoy episode with its own triple — must NOT come back.
8192            let decoy_rowid = seed_episode(
8193                &conn,
8194                "33330000-0000-7000-8000-000000000999",
8195                200,
8196                "decoy",
8197            );
8198            seed_triple_row(
8199                &conn,
8200                "t-decoy",
8201                "Alice",
8202                "p",
8203                "Eve",
8204                Some(decoy_rowid),
8205            );
8206        }
8207        let uri = format!(
8208            "/v1/graph/edges?type=triple&node_id={}",
8209            percent_encode_node_id(&format!("ep:{memory_id}"))
8210        );
8211        let (status, body) =
8212            runtime.block_on(call(h.router.clone(), "GET", &uri, None));
8213        assert_eq!(status, StatusCode::OK, "body: {body}");
8214        let edges = body["edges"].as_array().unwrap();
8215        assert_eq!(edges.len(), 3, "expected 3 incident edges: {body}");
8216        for e in edges {
8217            assert_eq!(e["source"], format!("ep:{memory_id}"));
8218            assert_eq!(e["kind"], "triple");
8219        }
8220        h.shutdown(&runtime);
8221    }
8222
8223    #[test]
8224    fn edges_filter_by_type_works() {
8225        let runtime = rt();
8226        let h = Harness::new(&runtime);
8227        {
8228            let conn = h.open_db();
8229            let rowid = seed_episode(
8230                &conn,
8231                "44440000-0000-7000-8000-000000000001",
8232                100,
8233                "ep",
8234            );
8235            seed_triple_row(&conn, "t-only", "Alice", "p", "Bob", Some(rowid));
8236            seed_document_row(&conn, "doc-skip", "doc");
8237            seed_chunk_row(&conn, "c-skip", "doc-skip", 0, "chunk");
8238        }
8239        let (status, body) = runtime.block_on(call(
8240            h.router.clone(),
8241            "GET",
8242            "/v1/graph/edges?type=triple",
8243            None,
8244        ));
8245        assert_eq!(status, StatusCode::OK, "{body}");
8246        let edges = body["edges"].as_array().unwrap();
8247        assert!(!edges.is_empty(), "{body}");
8248        for e in edges {
8249            assert_eq!(e["kind"], "triple", "{body}");
8250        }
8251        h.shutdown(&runtime);
8252    }
8253
8254    #[test]
8255    fn edges_rejects_semantic_type_with_400() {
8256        let runtime = rt();
8257        let h = Harness::new(&runtime);
8258        let (status, body) = runtime.block_on(call(
8259            h.router.clone(),
8260            "GET",
8261            "/v1/graph/edges?type=semantic",
8262            None,
8263        ));
8264        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
8265        let err = body["error"].as_str().unwrap_or_default();
8266        assert!(
8267            err.contains("/v1/graph/neighbors"),
8268            "error must point to /v1/graph/neighbors: {body}"
8269        );
8270        h.shutdown(&runtime);
8271    }
8272
8273    #[test]
8274    fn edges_pagination_round_trip() {
8275        let runtime = rt();
8276        let h = Harness::new(&runtime);
8277        {
8278            let conn = h.open_db();
8279            let rowid = seed_episode(
8280                &conn,
8281                "55550000-0000-7000-8000-000000000001",
8282                100,
8283                "ep big",
8284            );
8285            // 60 triples → 60 triple edges. limit=25 → 3 pages.
8286            for i in 0..60 {
8287                let tid = format!("t-page-{i:03}");
8288                let obj = format!("Obj{i:03}");
8289                seed_triple_row(&conn, &tid, "Alice", "p", &obj, Some(rowid));
8290            }
8291        }
8292        let limit = 25u32;
8293        let mut seen: std::collections::HashSet<String> = Default::default();
8294        let mut next_cursor: Option<String> = None;
8295        for page_idx in 0..5 {
8296            let cursor_param = next_cursor
8297                .as_deref()
8298                .map(|c| format!("&cursor={c}"))
8299                .unwrap_or_default();
8300            let uri = format!(
8301                "/v1/graph/edges?type=triple&limit={limit}{cursor_param}"
8302            );
8303            let (status, body) =
8304                runtime.block_on(call(h.router.clone(), "GET", &uri, None));
8305            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
8306            let edges = body["edges"].as_array().unwrap();
8307            for e in edges {
8308                let id = e["id"].as_str().unwrap().to_string();
8309                assert!(seen.insert(id.clone()), "duplicate edge id: {id}");
8310            }
8311            next_cursor = body
8312                .get("next_cursor")
8313                .and_then(|v| v.as_str())
8314                .map(|s| s.to_string());
8315            if next_cursor.is_none() {
8316                break;
8317            }
8318        }
8319        assert_eq!(
8320            seen.len(),
8321            60,
8322            "expected 60 distinct edges, got {}",
8323            seen.len()
8324        );
8325        assert!(next_cursor.is_none(), "expected exhausted cursor");
8326        h.shutdown(&runtime);
8327    }
8328
8329    #[test]
8330    fn edges_respects_tenant_scoping() {
8331        let runtime = rt();
8332        let h = Harness::new(&runtime);
8333        {
8334            let conn = h.open_db();
8335            let rowid = seed_episode(
8336                &conn,
8337                "66660000-0000-7000-8000-000000000001",
8338                100,
8339                "ep",
8340            );
8341            seed_triple_row(&conn, "t-tenant", "Alice", "p", "Bob", Some(rowid));
8342        }
8343        let r = h.router.clone();
8344        let (status, _) = runtime.block_on(async {
8345            let req = Request::builder()
8346                .method("GET")
8347                .uri("/v1/graph/edges")
8348                .header("x-solo-tenant", "never-registered-tenant")
8349                .body(Body::empty())
8350                .unwrap();
8351            let resp = r.oneshot(req).await.expect("oneshot");
8352            let s = resp.status();
8353            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8354            (s, _b)
8355        });
8356        assert_eq!(status, StatusCode::NOT_FOUND);
8357        h.shutdown(&runtime);
8358    }
8359
8360    #[test]
8361    fn edges_respects_auth_when_enabled() {
8362        let runtime = rt();
8363        let h = Harness::new_with_auth(&runtime, Some("edges-secret".into()));
8364        let (status, _) = runtime.block_on(call(
8365            h.router.clone(),
8366            "GET",
8367            "/v1/graph/edges",
8368            None,
8369        ));
8370        assert_eq!(status, StatusCode::UNAUTHORIZED);
8371        let (status, _) = runtime.block_on(call_with_auth(
8372            h.router.clone(),
8373            "GET",
8374            "/v1/graph/edges",
8375            None,
8376            Some("Bearer edges-secret"),
8377        ));
8378        assert_eq!(status, StatusCode::OK);
8379        h.shutdown(&runtime);
8380    }
8381
8382    // ---------------------------------------------------------------------
8383    // v0.10.0: GET /v1/graph/inspect/{id}
8384    //
8385    // Kind-discriminated full-record drill. Shares auth + tenant + node-id
8386    // prefix scaffolding with /v1/graph/expand and /v1/graph/{nodes,edges},
8387    // so tests focus on the new surface: per-kind full_text source +
8388    // triples_in/out shape + entity zero-triple 404 semantics + the
8389    // standard 400/404/auth/tenant cases.
8390    // ---------------------------------------------------------------------
8391
8392    fn inspect_uri(node_id: &str) -> String {
8393        // Path parameter must be percent-encoded (`:` is `%3A` after
8394        // the URI parser splits segments). axum's Path<String>
8395        // extractor percent-decodes automatically.
8396        format!("/v1/graph/inspect/{}", percent_encode_node_id(node_id))
8397    }
8398
8399    #[test]
8400    fn inspect_episode_returns_full_text_plus_triples_out() {
8401        let runtime = rt();
8402        let h = Harness::new(&runtime);
8403        let memory_id = "a1110000-0000-7000-8000-000000000001";
8404        let full_text = "Met Alice for coffee at the new place. She mentioned the project is on track but they're hitting issues with the deploy pipeline.";
8405        {
8406            let conn = h.open_db();
8407            let rowid = seed_episode(&conn, memory_id, 1_715_625_600_000, full_text);
8408            seed_triple_row(&conn, "t-ep-1", "user", "met_with", "Alice", Some(rowid));
8409            seed_triple_row(&conn, "t-ep-2", "user", "discussed", "deploy_pipeline", Some(rowid));
8410            seed_triple_row(&conn, "t-ep-3", "Alice", "works_on", "project", Some(rowid));
8411        }
8412        let (status, body) = runtime.block_on(call(
8413            h.router.clone(),
8414            "GET",
8415            &inspect_uri(&format!("ep:{memory_id}")),
8416            None,
8417        ));
8418        assert_eq!(status, StatusCode::OK, "body: {body}");
8419        assert_eq!(body["node"]["kind"], "episode");
8420        assert_eq!(body["node"]["id"], format!("ep:{memory_id}"));
8421        assert_eq!(
8422            body["full_text"].as_str().unwrap(),
8423            full_text,
8424            "full_text must match episodes.content verbatim, untruncated"
8425        );
8426        let triples_out = body["triples_out"].as_array().unwrap();
8427        assert_eq!(triples_out.len(), 3, "{body}");
8428        let triples_in = body["triples_in"].as_array().unwrap();
8429        assert!(triples_in.is_empty(), "episodes have no triples_in: {body}");
8430        for e in triples_out {
8431            assert_eq!(e["kind"], "triple");
8432            assert_eq!(e["source"], format!("ep:{memory_id}"));
8433            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
8434            assert!(e["predicate"].as_str().is_some());
8435            assert!(e["weight"].as_f64().is_some());
8436        }
8437        h.shutdown(&runtime);
8438    }
8439
8440    #[test]
8441    fn inspect_episode_triples_in_is_empty_for_v10p1() {
8442        // Seed an episode + a triple from a DIFFERENT episode that
8443        // happens to mention the focal episode's content. Even with
8444        // entities referencing the episode topic, episode.triples_in
8445        // is structurally empty in v0.10.0 P1.
8446        let runtime = rt();
8447        let h = Harness::new(&runtime);
8448        let focal = "a2220000-0000-7000-8000-000000000001";
8449        let other = "a2220000-0000-7000-8000-000000000002";
8450        {
8451            let conn = h.open_db();
8452            seed_episode(&conn, focal, 100, "focal episode body");
8453            let other_rowid = seed_episode(&conn, other, 200, "another episode");
8454            // Entity "user" gets referenced heavily; doesn't matter --
8455            // episode triples_in stays empty.
8456            for i in 0..5 {
8457                let tid = format!("t-other-{i}");
8458                seed_triple_row(&conn, &tid, "user", "did", "thing", Some(other_rowid));
8459            }
8460        }
8461        let (status, body) = runtime.block_on(call(
8462            h.router.clone(),
8463            "GET",
8464            &inspect_uri(&format!("ep:{focal}")),
8465            None,
8466        ));
8467        assert_eq!(status, StatusCode::OK, "body: {body}");
8468        let triples_in = body["triples_in"].as_array().unwrap();
8469        assert!(
8470            triples_in.is_empty(),
8471            "episode triples_in must be empty regardless of cross-episode entity references: {body}"
8472        );
8473        h.shutdown(&runtime);
8474    }
8475
8476    #[test]
8477    fn inspect_document_returns_full_text_concatenated_from_chunks() {
8478        let runtime = rt();
8479        let h = Harness::new(&runtime);
8480        let doc_id = "d3330000-0000-7000-8000-000000000001";
8481        {
8482            let conn = h.open_db();
8483            seed_document_row(&conn, doc_id, "doc-title");
8484            seed_chunk_row(&conn, "ch-doc-1", doc_id, 0, "First chunk body.");
8485            seed_chunk_row(&conn, "ch-doc-2", doc_id, 1, "Second chunk body.");
8486            seed_chunk_row(&conn, "ch-doc-3", doc_id, 2, "Third chunk body.");
8487        }
8488        let (status, body) = runtime.block_on(call(
8489            h.router.clone(),
8490            "GET",
8491            &inspect_uri(&format!("doc:{doc_id}")),
8492            None,
8493        ));
8494        assert_eq!(status, StatusCode::OK, "body: {body}");
8495        assert_eq!(body["node"]["kind"], "document");
8496        let full_text = body["full_text"].as_str().unwrap();
8497        // Concatenation order matches chunk_index ASC; separator is "\n\n".
8498        assert_eq!(
8499            full_text,
8500            "First chunk body.\n\nSecond chunk body.\n\nThird chunk body."
8501        );
8502        assert!(body["triples_in"].as_array().unwrap().is_empty());
8503        assert!(body["triples_out"].as_array().unwrap().is_empty());
8504        h.shutdown(&runtime);
8505    }
8506
8507    #[test]
8508    fn inspect_chunk_returns_text() {
8509        let runtime = rt();
8510        let h = Harness::new(&runtime);
8511        let chunk_body = "This is the body of the chunk being inspected.";
8512        {
8513            let conn = h.open_db();
8514            seed_document_row(&conn, "doc-chunk-host", "host");
8515            seed_chunk_row(&conn, "chunk-inspect-target", "doc-chunk-host", 0, chunk_body);
8516        }
8517        let (status, body) = runtime.block_on(call(
8518            h.router.clone(),
8519            "GET",
8520            &inspect_uri("chunk:chunk-inspect-target"),
8521            None,
8522        ));
8523        assert_eq!(status, StatusCode::OK, "body: {body}");
8524        assert_eq!(body["node"]["kind"], "chunk");
8525        assert_eq!(body["full_text"].as_str().unwrap(), chunk_body);
8526        assert!(body["triples_in"].as_array().unwrap().is_empty());
8527        assert!(body["triples_out"].as_array().unwrap().is_empty());
8528        h.shutdown(&runtime);
8529    }
8530
8531    #[test]
8532    fn inspect_cluster_returns_label_and_abstraction() {
8533        let runtime = rt();
8534        let h = Harness::new(&runtime);
8535        let cluster_id = "cl-inspect-target";
8536        let abstraction_text = "Discussions about the deploy pipeline and on-call rotation.";
8537        {
8538            let conn = h.open_db();
8539            seed_cluster_row(&conn, cluster_id, 12345);
8540            seed_abstraction_row(&conn, "abs-1", cluster_id, abstraction_text);
8541        }
8542        let (status, body) = runtime.block_on(call(
8543            h.router.clone(),
8544            "GET",
8545            &inspect_uri(&format!("cl:{cluster_id}")),
8546            None,
8547        ));
8548        assert_eq!(status, StatusCode::OK, "body: {body}");
8549        assert_eq!(body["node"]["kind"], "cluster");
8550        let full_text = body["full_text"].as_str().unwrap();
8551        assert!(
8552            full_text.contains(cluster_id),
8553            "full_text must include cluster label: {full_text}"
8554        );
8555        assert!(
8556            full_text.contains(abstraction_text),
8557            "full_text must include abstraction text: {full_text}"
8558        );
8559        // "label\n\nabstraction" -- separated by blank line for the
8560        // inspector renderer.
8561        assert!(full_text.contains("\n\n"), "label and abstraction must be separated: {full_text}");
8562        h.shutdown(&runtime);
8563    }
8564
8565    #[test]
8566    fn inspect_entity_returns_triples_only() {
8567        let runtime = rt();
8568        let h = Harness::new(&runtime);
8569        {
8570            let conn = h.open_db();
8571            let rowid = seed_episode(
8572                &conn,
8573                "e5550000-0000-7000-8000-000000000001",
8574                100,
8575                "host episode",
8576            );
8577            // 5 triples that reference Alice (as subject or object).
8578            seed_triple_row(&conn, "t-ent-1", "Alice", "knows", "Bob", Some(rowid));
8579            seed_triple_row(&conn, "t-ent-2", "Alice", "works_at", "Anthropic", Some(rowid));
8580            seed_triple_row(&conn, "t-ent-3", "user", "met", "Alice", Some(rowid));
8581            seed_triple_row(&conn, "t-ent-4", "Alice", "owns", "laptop", Some(rowid));
8582            seed_triple_row(&conn, "t-ent-5", "Carol", "mentors", "Alice", Some(rowid));
8583        }
8584        let (status, body) = runtime.block_on(call(
8585            h.router.clone(),
8586            "GET",
8587            &inspect_uri("ent:Alice"),
8588            None,
8589        ));
8590        assert_eq!(status, StatusCode::OK, "body: {body}");
8591        assert_eq!(body["node"]["kind"], "entity");
8592        assert_eq!(body["node"]["id"], "ent:Alice");
8593        assert!(
8594            body["full_text"].is_null(),
8595            "entity full_text must be null (entities have no body): {body}"
8596        );
8597        let triples_out = body["triples_out"].as_array().unwrap();
8598        assert_eq!(triples_out.len(), 5, "{body}");
8599        assert!(body["triples_in"].as_array().unwrap().is_empty());
8600        for e in triples_out {
8601            assert_eq!(e["kind"], "triple");
8602            assert_eq!(e["source"], "ent:Alice");
8603            // Counterpart is always an entity; Alice never appears on
8604            // both ends so target != source.
8605            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
8606            assert_ne!(e["target"], "ent:Alice");
8607        }
8608        h.shutdown(&runtime);
8609    }
8610
8611    #[test]
8612    fn inspect_entity_with_zero_triples_returns_404() {
8613        let runtime = rt();
8614        let h = Harness::new(&runtime);
8615        // Seed unrelated triples so the table isn't empty; the target
8616        // entity still has zero references.
8617        {
8618            let conn = h.open_db();
8619            let rowid = seed_episode(
8620                &conn,
8621                "e6660000-0000-7000-8000-000000000001",
8622                100,
8623                "ep",
8624            );
8625            seed_triple_row(&conn, "t-other", "Bob", "knows", "Carol", Some(rowid));
8626        }
8627        let (status, body) = runtime.block_on(call(
8628            h.router.clone(),
8629            "GET",
8630            &inspect_uri("ent:Nonexistent"),
8631            None,
8632        ));
8633        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
8634        let err = body["error"].as_str().unwrap_or_default();
8635        assert!(
8636            err.contains("Nonexistent") || err.contains("entity"),
8637            "error must mention entity: {body}"
8638        );
8639        h.shutdown(&runtime);
8640    }
8641
8642    #[test]
8643    fn inspect_404_on_missing_node() {
8644        // Well-formed `ep:` prefix + valid UUID shape, but no row in DB.
8645        let runtime = rt();
8646        let h = Harness::new(&runtime);
8647        let (status, body) = runtime.block_on(call(
8648            h.router.clone(),
8649            "GET",
8650            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
8651            None,
8652        ));
8653        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
8654        h.shutdown(&runtime);
8655    }
8656
8657    #[test]
8658    fn inspect_400_on_invalid_prefix() {
8659        let runtime = rt();
8660        let h = Harness::new(&runtime);
8661        let (status, body) = runtime.block_on(call(
8662            h.router.clone(),
8663            "GET",
8664            &inspect_uri("xyz:foo"),
8665            None,
8666        ));
8667        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
8668        let err = body["error"].as_str().unwrap_or_default();
8669        assert!(
8670            err.contains("xyz") || err.contains("prefix"),
8671            "error must mention bad prefix: {body}"
8672        );
8673        h.shutdown(&runtime);
8674    }
8675
8676    #[test]
8677    fn inspect_respects_tenant_scoping() {
8678        let runtime = rt();
8679        let h = Harness::new(&runtime);
8680        let memory_id = "a7770000-0000-7000-8000-000000000001";
8681        {
8682            let conn = h.open_db();
8683            seed_episode(&conn, memory_id, 100, "tenant scope");
8684        }
8685        // Real id in default tenant resolves; the same request against
8686        // a never-registered tenant header surfaces 404 from the tenant
8687        // extractor before the handler runs.
8688        let r = h.router.clone();
8689        let (status, _) = runtime.block_on(async {
8690            let req = Request::builder()
8691                .method("GET")
8692                .uri(inspect_uri(&format!("ep:{memory_id}")))
8693                .header("x-solo-tenant", "never-registered-tenant")
8694                .body(Body::empty())
8695                .unwrap();
8696            let resp = r.oneshot(req).await.expect("oneshot");
8697            let s = resp.status();
8698            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8699            (s, _b)
8700        });
8701        assert_eq!(status, StatusCode::NOT_FOUND);
8702        // Sanity: same id resolves on the default tenant.
8703        let (status, body) = runtime.block_on(call(
8704            h.router.clone(),
8705            "GET",
8706            &inspect_uri(&format!("ep:{memory_id}")),
8707            None,
8708        ));
8709        assert_eq!(status, StatusCode::OK, "default tenant must resolve: {body}");
8710        h.shutdown(&runtime);
8711    }
8712
8713    #[test]
8714    fn inspect_respects_auth_when_enabled() {
8715        let runtime = rt();
8716        let h = Harness::new_with_auth(&runtime, Some("inspect-secret".into()));
8717        // Missing bearer -> 401 before handler runs.
8718        let (status, _) = runtime.block_on(call(
8719            h.router.clone(),
8720            "GET",
8721            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
8722            None,
8723        ));
8724        assert_eq!(status, StatusCode::UNAUTHORIZED);
8725        // Valid bearer + unknown node -> handler runs and returns 404,
8726        // proving auth passed through.
8727        let (status, _) = runtime.block_on(call_with_auth(
8728            h.router.clone(),
8729            "GET",
8730            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
8731            None,
8732            Some("Bearer inspect-secret"),
8733        ));
8734        assert_eq!(status, StatusCode::NOT_FOUND);
8735        h.shutdown(&runtime);
8736    }
8737
8738    // ---------------------------------------------------------------------
8739    // v0.10.0: GET /v1/graph/neighbors/{id}
8740    //
8741    // Unified explicit + HNSW-semantic neighbor surface for solo-web's
8742    // "show similar" overlay. Tests cover the kind dispatch (explicit /
8743    // semantic / both default), threshold filter, limit clamp, dedupe
8744    // rule, and the standard 400/404/auth/tenant gates.
8745    // ---------------------------------------------------------------------
8746
8747    /// URL builder for the neighbors endpoint. `kind`/`threshold`/`limit`
8748    /// are all optional; pass `None` to omit the corresponding query
8749    /// parameter. The node id is percent-encoded so `:` survives the path
8750    /// extractor.
8751    fn neighbors_uri(
8752        node_id: &str,
8753        kind: Option<&str>,
8754        threshold: Option<f32>,
8755        limit: Option<u32>,
8756    ) -> String {
8757        let mut qs: Vec<String> = Vec::new();
8758        if let Some(k) = kind {
8759            qs.push(format!("kind={k}"));
8760        }
8761        if let Some(t) = threshold {
8762            qs.push(format!("threshold={t}"));
8763        }
8764        if let Some(l) = limit {
8765            qs.push(format!("limit={l}"));
8766        }
8767        let encoded = percent_encode_node_id(node_id);
8768        if qs.is_empty() {
8769            format!("/v1/graph/neighbors/{encoded}")
8770        } else {
8771            format!("/v1/graph/neighbors/{encoded}?{}", qs.join("&"))
8772        }
8773    }
8774
8775    /// 1. `?kind=explicit` returns only structural edges (no semantic).
8776    /// Seeds an episode with 2 explicit (triple) neighbors + several
8777    /// distinct other episodes so the semantic path COULD surface
8778    /// candidates. The `kind=explicit` filter must drop all of them.
8779    #[test]
8780    fn neighbors_explicit_only_returns_no_semantic_edges() {
8781        let runtime = rt();
8782        let h = Harness::new(&runtime);
8783        runtime.block_on(async {
8784            // Seed several episodes via the writer-actor so they get HNSW
8785            // entries -- the semantic path would surface these if it
8786            // wasn't filtered out.
8787            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
8788            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
8789            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
8790            // Add explicit triples sourced from `focal`. seed_triple_row
8791            // needs the focal rowid -- look it up via a side connection.
8792            {
8793                let conn = h.open_db();
8794                let rowid: i64 = conn
8795                    .query_row(
8796                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
8797                        rusqlite::params![&focal],
8798                        |r| r.get(0),
8799                    )
8800                    .unwrap();
8801                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
8802                seed_triple_row(&conn, "t-exp-2", "Alice", "owns", "laptop", Some(rowid));
8803            }
8804            let (status, body) = call(
8805                h.router.clone(),
8806                "GET",
8807                &neighbors_uri(&format!("ep:{focal}"), Some("explicit"), None, None),
8808                None,
8809            )
8810            .await;
8811            assert_eq!(status, StatusCode::OK, "body: {body}");
8812            let edges = body["edges"].as_array().unwrap();
8813            assert!(!edges.is_empty(), "expected explicit edges: {body}");
8814            for e in edges {
8815                assert_ne!(
8816                    e["kind"], "semantic",
8817                    "kind=explicit must drop semantic edges: {body}"
8818                );
8819            }
8820        });
8821        h.shutdown(&runtime);
8822    }
8823
8824    /// 2. `?kind=semantic` returns only HNSW edges (no explicit).
8825    /// Inverse of test 1 -- same fixture, opposite filter.
8826    #[test]
8827    fn neighbors_semantic_only_returns_no_explicit_edges() {
8828        let runtime = rt();
8829        let h = Harness::new(&runtime);
8830        runtime.block_on(async {
8831            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
8832            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
8833            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
8834            {
8835                let conn = h.open_db();
8836                let rowid: i64 = conn
8837                    .query_row(
8838                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
8839                        rusqlite::params![&focal],
8840                        |r| r.get(0),
8841                    )
8842                    .unwrap();
8843                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
8844            }
8845            // Threshold=0 so every HNSW hit clears the filter.
8846            let (status, body) = call(
8847                h.router.clone(),
8848                "GET",
8849                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
8850                None,
8851            )
8852            .await;
8853            assert_eq!(status, StatusCode::OK, "body: {body}");
8854            let edges = body["edges"].as_array().unwrap();
8855            for e in edges {
8856                assert_eq!(
8857                    e["kind"], "semantic",
8858                    "kind=semantic must drop explicit edges: {body}"
8859                );
8860                assert!(e["weight"].is_number(), "semantic edges carry weight: {body}");
8861            }
8862        });
8863        h.shutdown(&runtime);
8864    }
8865
8866    /// 3. Default (no `kind=` param) returns both explicit + semantic.
8867    #[test]
8868    fn neighbors_both_default_returns_combined() {
8869        let runtime = rt();
8870        let h = Harness::new(&runtime);
8871        runtime.block_on(async {
8872            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
8873            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
8874            {
8875                let conn = h.open_db();
8876                let rowid: i64 = conn
8877                    .query_row(
8878                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
8879                        rusqlite::params![&focal],
8880                        |r| r.get(0),
8881                    )
8882                    .unwrap();
8883                seed_triple_row(&conn, "t-both-1", "Alice", "met", "Bob", Some(rowid));
8884            }
8885            let (status, body) = call(
8886                h.router.clone(),
8887                "GET",
8888                // No kind param -> default = both. Threshold 0 so semantic
8889                // hits make it through the filter.
8890                &neighbors_uri(&format!("ep:{focal}"), None, Some(0.0), None),
8891                None,
8892            )
8893            .await;
8894            assert_eq!(status, StatusCode::OK, "body: {body}");
8895            let edges = body["edges"].as_array().unwrap();
8896            let kinds: std::collections::HashSet<&str> = edges
8897                .iter()
8898                .map(|e| e["kind"].as_str().unwrap())
8899                .collect();
8900            assert!(
8901                kinds.contains("triple"),
8902                "expected at least one triple edge: {body}"
8903            );
8904            assert!(
8905                kinds.contains("semantic"),
8906                "expected at least one semantic edge: {body}"
8907            );
8908        });
8909        h.shutdown(&runtime);
8910    }
8911
8912    /// 4. Dedupe rule. Construct an episode X whose semantic-neighbor Y
8913    /// is ALSO a triple-target -- i.e. the explicit and semantic paths
8914    /// both produce an edge X -> Y. After dedupe only the explicit edge
8915    /// survives.
8916    #[test]
8917    fn neighbors_dedupes_semantic_when_explicit_exists() {
8918        let runtime = rt();
8919        let h = Harness::new(&runtime);
8920        runtime.block_on(async {
8921            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
8922            // Seed an explicit triple from focal -> ent:peer-target.
8923            // The semantic path produces edges focal -> ep:<other>; we
8924            // ensure both paths produce an edge ending at the same id by
8925            // wiring `peer-target = ep:<other_memory_id>` -- but the
8926            // entity emitter uses `ent:` prefix, not `ep:`. So to force a
8927            // collision we need an edge form where source+target overlap.
8928            //
8929            // Simpler construction: the `expand_triple_from_episode` path
8930            // emits an edge `ent:subject -> ent:object`, not from the
8931            // focal episode -- meaning the explicit edges don't end at
8932            // an ep: node in the first place. So we have to engineer a
8933            // collision via the cluster_member path:
8934            //   * explicit: focal (episode) -> cluster (via cluster_member)
8935            //   * semantic: focal -> similar episode
8936            // The two endpoints (cluster vs. episode) never collide in
8937            // shape. To produce a real (source, target) overlap that
8938            // exercises the dedupe code, mint a synthetic semantic edge
8939            // by adding an explicit triple sourced from the focal that
8940            // happens to end at the SAME entity the semantic path would
8941            // emit -- but semantic only emits ep:/chunk: ids, never ent:.
8942            //
8943            // The brief flagged this scenario as unlikely. Build the
8944            // simplest collision the codebase admits: have the focal
8945            // episode's semantic neighbor's memory_id appear as a
8946            // triple's object_id (formatted as ent:<that-uuid>). The
8947            // explicit edge is then `ent:<self-subject> -> ent:<uuid>`;
8948            // the semantic edge is `ep:focal -> ep:<uuid>`. The (source,
8949            // target) pair DIFFERS (`ent:X` vs `ep:focal`), so dedupe
8950            // would NOT fire -- which is correct: those are structurally
8951            // different relationships.
8952            //
8953            // Therefore the realistic dedupe test is the trivial
8954            // tautology: explicit and semantic produce no collisions in
8955            // practice. Lock that in by asserting that the same memory_id
8956            // never appears with an edge from both paths.
8957            let _other = post_remember(h.router.clone(), "beta beta beta").await;
8958            {
8959                let conn = h.open_db();
8960                let rowid: i64 = conn
8961                    .query_row(
8962                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
8963                        rusqlite::params![&focal],
8964                        |r| r.get(0),
8965                    )
8966                    .unwrap();
8967                seed_triple_row(
8968                    &conn,
8969                    "t-dedupe-1",
8970                    "Alice",
8971                    "knows",
8972                    "Bob",
8973                    Some(rowid),
8974                );
8975            }
8976            let (status, body) = call(
8977                h.router.clone(),
8978                "GET",
8979                &neighbors_uri(&format!("ep:{focal}"), Some("both"), Some(0.0), None),
8980                None,
8981            )
8982            .await;
8983            assert_eq!(status, StatusCode::OK, "body: {body}");
8984            // For every edge, count occurrences of (source, target). No
8985            // pair should appear twice (which is what the dedupe rule
8986            // guarantees).
8987            let edges = body["edges"].as_array().unwrap();
8988            let mut seen: std::collections::HashMap<(String, String), i32> =
8989                std::collections::HashMap::new();
8990            for e in edges {
8991                let key = (
8992                    e["source"].as_str().unwrap().to_string(),
8993                    e["target"].as_str().unwrap().to_string(),
8994                );
8995                *seen.entry(key).or_insert(0) += 1;
8996            }
8997            for (pair, count) in &seen {
8998                assert_eq!(
8999                    *count, 1,
9000                    "edge pair {pair:?} appears {count} times -- dedupe rule violated: {body}"
9001                );
9002            }
9003        });
9004        h.shutdown(&runtime);
9005    }
9006
9007    /// 5. Threshold filter -- raising the threshold drops low-similarity
9008    /// semantic neighbors.
9009    #[test]
9010    fn neighbors_threshold_filters_low_similarity() {
9011        let runtime = rt();
9012        let h = Harness::new(&runtime);
9013        runtime.block_on(async {
9014            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
9015            let _o1 = post_remember(h.router.clone(), "beta one").await;
9016            let _o2 = post_remember(h.router.clone(), "beta two").await;
9017            let _o3 = post_remember(h.router.clone(), "beta three").await;
9018            // Low threshold -- expect more semantic hits.
9019            let (status, low_body) = call(
9020                h.router.clone(),
9021                "GET",
9022                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
9023                None,
9024            )
9025            .await;
9026            assert_eq!(status, StatusCode::OK, "body: {low_body}");
9027            let low_edge_count = low_body["edges"].as_array().unwrap().len();
9028            // High threshold -- expect fewer (or equal) semantic hits.
9029            let (status, high_body) = call(
9030                h.router.clone(),
9031                "GET",
9032                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.99), None),
9033                None,
9034            )
9035            .await;
9036            assert_eq!(status, StatusCode::OK, "body: {high_body}");
9037            let high_edge_count = high_body["edges"].as_array().unwrap().len();
9038            assert!(
9039                high_edge_count <= low_edge_count,
9040                "high-threshold ({high_edge_count}) must not exceed low-threshold ({low_edge_count}): low={low_body}, high={high_body}"
9041            );
9042            // Also assert every surviving high-threshold edge satisfies
9043            // the filter.
9044            for e in high_body["edges"].as_array().unwrap() {
9045                if let Some(w) = e["weight"].as_f64() {
9046                    assert!(
9047                        w >= 0.99,
9048                        "edge with weight {w} survived threshold=0.99: {e}"
9049                    );
9050                }
9051            }
9052        });
9053        h.shutdown(&runtime);
9054    }
9055
9056    /// 6. `?limit=999` is silently clamped at the family ceiling (100) --
9057    /// same policy as `/v1/graph/expand`.
9058    #[test]
9059    fn neighbors_limit_clamped_at_100() {
9060        let runtime = rt();
9061        let h = Harness::new(&runtime);
9062        // Seed a cluster with > 100 episodes so the explicit cluster_member
9063        // path could surface > 100 -- clamp must cap at 100.
9064        {
9065            let conn = h.open_db();
9066            seed_cluster_row(&conn, "cl-huge-n", 1000);
9067            for i in 0..150 {
9068                let mid = format!("99119911-1111-7000-8000-{:012}", i);
9069                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
9070                seed_cluster_member(&conn, "cl-huge-n", &mid);
9071            }
9072        }
9073        let (status, body) = runtime.block_on(call(
9074            h.router.clone(),
9075            "GET",
9076            &neighbors_uri("cl:cl-huge-n", Some("explicit"), None, Some(999)),
9077            None,
9078        ));
9079        assert_eq!(status, StatusCode::OK, "body: {body}");
9080        let edges = body["edges"].as_array().unwrap();
9081        assert_eq!(
9082            edges.len(),
9083            100,
9084            "limit must be silently clamped to 100, got {}",
9085            edges.len()
9086        );
9087        h.shutdown(&runtime);
9088    }
9089
9090    /// 7. `kind=semantic` on a document focal node returns 400.
9091    #[test]
9092    fn neighbors_semantic_rejects_document_source() {
9093        let runtime = rt();
9094        let h = Harness::new(&runtime);
9095        let doc_id = "d-semrej-0000-7000-8000-000000000001";
9096        {
9097            let conn = h.open_db();
9098            seed_document_row(&conn, doc_id, "host");
9099        }
9100        let (status, body) = runtime.block_on(call(
9101            h.router.clone(),
9102            "GET",
9103            &neighbors_uri(
9104                &format!("doc:{doc_id}"),
9105                Some("semantic"),
9106                None,
9107                None,
9108            ),
9109            None,
9110        ));
9111        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
9112        let err = body["error"].as_str().unwrap_or_default();
9113        assert!(
9114            err.contains("episode") && err.contains("chunk"),
9115            "error must list supported kinds: {body}"
9116        );
9117        h.shutdown(&runtime);
9118    }
9119
9120    /// 8. `kind=semantic` on a cluster focal node returns 400.
9121    #[test]
9122    fn neighbors_semantic_rejects_cluster_source() {
9123        let runtime = rt();
9124        let h = Harness::new(&runtime);
9125        let cluster_id = "cl-semrej-target";
9126        {
9127            let conn = h.open_db();
9128            seed_cluster_row(&conn, cluster_id, 12345);
9129        }
9130        let (status, body) = runtime.block_on(call(
9131            h.router.clone(),
9132            "GET",
9133            &neighbors_uri(
9134                &format!("cl:{cluster_id}"),
9135                Some("semantic"),
9136                None,
9137                None,
9138            ),
9139            None,
9140        ));
9141        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
9142        h.shutdown(&runtime);
9143    }
9144
9145    /// 9. Entity focal node returns only explicit triple edges; no
9146    /// semantic edges (entities have no embeddings, semantic path is
9147    /// silently skipped under `kind=both`).
9148    #[test]
9149    fn neighbors_entity_returns_triples_only() {
9150        let runtime = rt();
9151        let h = Harness::new(&runtime);
9152        runtime.block_on(async {
9153            // Use the writer-actor so the host episode lands in HNSW too
9154            // (any HNSW state is irrelevant since entities can't trigger
9155            // semantic recall; included to prove the semantic path is
9156            // silently skipped, not erroring).
9157            let host_mid = post_remember(h.router.clone(), "Alice and Bob talked").await;
9158            {
9159                let conn = h.open_db();
9160                let rowid: i64 = conn
9161                    .query_row(
9162                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
9163                        rusqlite::params![&host_mid],
9164                        |r| r.get(0),
9165                    )
9166                    .unwrap();
9167                seed_triple_row(&conn, "t-ent-n-1", "Alice", "knows", "Bob", Some(rowid));
9168                seed_triple_row(&conn, "t-ent-n-2", "Alice", "works_at", "Acme", Some(rowid));
9169            }
9170            let (status, body) = call(
9171                h.router.clone(),
9172                "GET",
9173                &neighbors_uri("ent:Alice", None, Some(0.0), None),
9174                None,
9175            )
9176            .await;
9177            assert_eq!(status, StatusCode::OK, "body: {body}");
9178            let edges = body["edges"].as_array().unwrap();
9179            assert!(!edges.is_empty(), "expected explicit triples: {body}");
9180            for e in edges {
9181                assert_eq!(
9182                    e["kind"], "triple",
9183                    "entity focal must produce only triple edges: {body}"
9184                );
9185            }
9186        });
9187        h.shutdown(&runtime);
9188    }
9189
9190    /// 10. Cross-tenant lookups are blocked at the TenantExtractor before
9191    /// the handler runs.
9192    #[test]
9193    fn neighbors_respects_tenant_scoping() {
9194        let runtime = rt();
9195        let h = Harness::new(&runtime);
9196        let memory_id = "a8880000-0000-7000-8000-000000000001";
9197        {
9198            let conn = h.open_db();
9199            seed_episode(&conn, memory_id, 100, "tenant scope");
9200        }
9201        // Wrong tenant header -> 404 from registry, before handler runs.
9202        let r = h.router.clone();
9203        let (status, _) = runtime.block_on(async {
9204            let req = Request::builder()
9205                .method("GET")
9206                .uri(neighbors_uri(
9207                    &format!("ep:{memory_id}"),
9208                    Some("explicit"),
9209                    None,
9210                    None,
9211                ))
9212                .header("x-solo-tenant", "never-registered-tenant-n")
9213                .body(Body::empty())
9214                .unwrap();
9215            let resp = r.oneshot(req).await.expect("oneshot");
9216            let s = resp.status();
9217            let _b = resp.into_body().collect().await.unwrap().to_bytes();
9218            (s, _b)
9219        });
9220        assert_eq!(status, StatusCode::NOT_FOUND);
9221        // Sanity: same id resolves on default tenant.
9222        let (status, body) = runtime.block_on(call(
9223            h.router.clone(),
9224            "GET",
9225            &neighbors_uri(&format!("ep:{memory_id}"), Some("explicit"), None, None),
9226            None,
9227        ));
9228        assert_eq!(status, StatusCode::OK, "default tenant must resolve: {body}");
9229        h.shutdown(&runtime);
9230    }
9231
9232    /// 11. Bearer-auth gate: missing token -> 401; valid token + unknown
9233    /// node -> 404 (auth passed, handler ran).
9234    #[test]
9235    fn neighbors_respects_auth_when_enabled() {
9236        let runtime = rt();
9237        let h = Harness::new_with_auth(&runtime, Some("neighbors-secret".into()));
9238        // Missing Authorization -> 401.
9239        let (status, _) = runtime.block_on(call(
9240            h.router.clone(),
9241            "GET",
9242            &neighbors_uri(
9243                "ep:99999999-9999-7000-8000-000000000999",
9244                Some("explicit"),
9245                None,
9246                None,
9247            ),
9248            None,
9249        ));
9250        assert_eq!(status, StatusCode::UNAUTHORIZED);
9251        // Valid bearer + unknown node -> 404 from the handler.
9252        let (status, _) = runtime.block_on(call_with_auth(
9253            h.router.clone(),
9254            "GET",
9255            &neighbors_uri(
9256                "ep:99999999-9999-7000-8000-000000000999",
9257                Some("explicit"),
9258                None,
9259                None,
9260            ),
9261            None,
9262            Some("Bearer neighbors-secret"),
9263        ));
9264        assert_eq!(status, StatusCode::NOT_FOUND);
9265        h.shutdown(&runtime);
9266    }
9267
9268    // ---------------------------------------------------------------------
9269    // v0.10.0: GET /v1/graph/stream — SSE invalidation feed
9270    //
9271    // Driving SSE through axum's in-process router (`oneshot`) requires
9272    // reading the response body as a stream of frames and parsing each
9273    // chunk against the SSE wire format (`event: NAME\ndata: JSON\n\n`).
9274    // The `read_one_sse_event` helper below does that incrementally so
9275    // tests don't have to wait for the stream to close (which would
9276    // never happen — the SSE loop runs until the client drops).
9277    // ---------------------------------------------------------------------
9278
9279    /// One parsed SSE event: the `event:` field plus the `data:` payload
9280    /// re-parsed as JSON. The `id:` field is captured for v0.11.0 P2's
9281    /// `/mcp` GET stream which threads monotonic event ids through
9282    /// the wire — `None` for streams (`/v1/graph/stream`) that don't
9283    /// emit `id:` lines. Empty / comment-only frames are filtered out
9284    /// by the parser; callers only see real events.
9285    #[derive(Debug, Clone)]
9286    struct ParsedSseEvent {
9287        event: String,
9288        data: Value,
9289        /// Raw SSE `id:` field, if present. v0.11.0 P2 emits monotonic
9290        /// `u64` ids for `/mcp` events; the wire encodes them as
9291        /// strings.
9292        id: Option<String>,
9293    }
9294
9295    /// Read frames off the SSE body until ONE complete event lands, then
9296    /// return it. Times out after `timeout` to keep red-test feedback
9297    /// fast. On timeout returns `None`.
9298    async fn read_one_sse_event(
9299        body: &mut axum::body::Body,
9300        timeout: std::time::Duration,
9301    ) -> Option<ParsedSseEvent> {
9302        use http_body_util::BodyExt;
9303        let mut buf = String::new();
9304        let start = std::time::Instant::now();
9305        loop {
9306            if start.elapsed() >= timeout {
9307                return None;
9308            }
9309            let remaining = timeout.saturating_sub(start.elapsed());
9310            let frame_res =
9311                tokio::time::timeout(remaining, body.frame()).await;
9312            let frame = match frame_res {
9313                Ok(Some(Ok(f))) => f,
9314                Ok(Some(Err(_))) | Ok(None) => return None,
9315                Err(_) => return None,
9316            };
9317            if let Ok(data) = frame.into_data() {
9318                buf.push_str(&String::from_utf8_lossy(&data));
9319                // Parse complete events (double newline separator).
9320                while let Some(idx) = buf.find("\n\n") {
9321                    let block: String = buf.drain(..idx + 2).collect();
9322                    if let Some(parsed) = parse_sse_block(&block) {
9323                        return Some(parsed);
9324                    }
9325                }
9326            }
9327        }
9328    }
9329
9330    /// Parse one SSE block (raw text between two `\n\n` separators).
9331    /// Returns `None` for comment-only blocks (lines starting with `:`)
9332    /// or blocks missing either `event:` or `data:`.
9333    fn parse_sse_block(block: &str) -> Option<ParsedSseEvent> {
9334        let mut event: Option<String> = None;
9335        let mut data: Option<String> = None;
9336        let mut id: Option<String> = None;
9337        for line in block.lines() {
9338            if let Some(rest) = line.strip_prefix("event:") {
9339                event = Some(rest.trim().to_string());
9340            } else if let Some(rest) = line.strip_prefix("data:") {
9341                data = Some(rest.trim().to_string());
9342            } else if let Some(rest) = line.strip_prefix("id:") {
9343                id = Some(rest.trim().to_string());
9344            }
9345        }
9346        let event = event?;
9347        let data_str = data?;
9348        let data_json = serde_json::from_str(&data_str).ok()?;
9349        Some(ParsedSseEvent {
9350            event,
9351            data: data_json,
9352            id,
9353        })
9354    }
9355
9356    /// Open the SSE stream and return the response body for further
9357    /// frame-level reads. The headers are validated (Content-Type +
9358    /// status) before the body is returned.
9359    async fn open_sse_stream_inner(
9360        router: axum::Router,
9361        auth: Option<&str>,
9362        tenant: Option<&str>,
9363    ) -> (StatusCode, axum::body::Body) {
9364        let mut builder = Request::builder()
9365            .method("GET")
9366            .uri("/v1/graph/stream");
9367        if let Some(a) = auth {
9368            builder = builder.header("authorization", a);
9369        }
9370        if let Some(t) = tenant {
9371            builder = builder.header("x-solo-tenant", t);
9372        }
9373        let req = builder
9374            .header("content-length", "0")
9375            .body(Body::empty())
9376            .unwrap();
9377        let resp = router.oneshot(req).await.expect("oneshot");
9378        let status = resp.status();
9379        let body = resp.into_body();
9380        (status, body)
9381    }
9382
9383    /// 1. `init` event lands as the first chunk.
9384    #[test]
9385    fn stream_emits_init_event_on_connect() {
9386        let runtime = rt();
9387        let h = Harness::new(&runtime);
9388        let r = h.router.clone();
9389        runtime.block_on(async {
9390            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
9391            assert_eq!(status, StatusCode::OK);
9392            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9393                .await
9394                .expect("must receive init event within 2s");
9395            assert_eq!(ev.event, "init");
9396            assert_eq!(ev.data["connected"].as_bool(), Some(true));
9397            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
9398            assert!(ev.data["ts_ms"].is_number());
9399        });
9400        h.shutdown(&runtime);
9401    }
9402
9403    /// 2. Firing an InvalidateEvent on the broadcast channel surfaces
9404    /// as an `invalidate` SSE event.
9405    #[test]
9406    fn stream_emits_invalidate_after_writer_event() {
9407        let runtime = rt();
9408        let h = Harness::new(&runtime);
9409        let r = h.router.clone();
9410        let sender = h.invalidate_sender();
9411        runtime.block_on(async {
9412            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
9413            assert_eq!(status, StatusCode::OK);
9414            // Discard the init event.
9415            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9416                .await
9417                .unwrap();
9418            assert_eq!(init.event, "init");
9419            // Fire a writer-actor-style event on the broadcast.
9420            sender
9421                .send(InvalidateEvent {
9422                    reason: "memory.remember".to_string(),
9423                    tenant_id: "default".to_string(),
9424                    ts_ms: 1_715_625_600_000,
9425                    kind: "episode".to_string(),
9426                })
9427                .expect("must have at least one subscriber");
9428            // The SSE handler must surface it.
9429            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9430                .await
9431                .expect("invalidate event must arrive within 2s");
9432            assert_eq!(ev.event, "invalidate");
9433            assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
9434            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
9435            assert_eq!(ev.data["kind"].as_str(), Some("episode"));
9436        });
9437        h.shutdown(&runtime);
9438    }
9439
9440    /// 3. Each kind of writer-actor event surfaces with its mapped
9441    /// `(reason, kind)` shape.
9442    #[test]
9443    fn stream_emits_invalidate_for_each_writer_command() {
9444        let runtime = rt();
9445        let h = Harness::new(&runtime);
9446        let r = h.router.clone();
9447        let sender = h.invalidate_sender();
9448        let cases = [
9449            ("memory.remember", "episode"),
9450            ("memory.forget", "episode"),
9451            ("memory.consolidate", "cluster"),
9452            ("memory.ingest_document", "document"),
9453            ("memory.forget_document", "document"),
9454            ("memory.triples_extract", "cluster"),
9455            ("memory.reembed", "episode"),
9456            ("gdpr.forget_user", "tenant"),
9457        ];
9458        runtime.block_on(async {
9459            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
9460            assert_eq!(status, StatusCode::OK);
9461            // Discard the init.
9462            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9463                .await
9464                .unwrap();
9465            for (reason, kind) in cases {
9466                sender
9467                    .send(InvalidateEvent {
9468                        reason: reason.to_string(),
9469                        tenant_id: "default".to_string(),
9470                        ts_ms: 1_715_625_600_000,
9471                        kind: kind.to_string(),
9472                    })
9473                    .unwrap();
9474                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9475                    .await
9476                    .unwrap_or_else(|| panic!("must receive event for {reason}"));
9477                assert_eq!(ev.event, "invalidate");
9478                assert_eq!(
9479                    ev.data["reason"].as_str(),
9480                    Some(reason),
9481                    "reason mismatch"
9482                );
9483                assert_eq!(ev.data["kind"].as_str(), Some(kind), "kind mismatch");
9484            }
9485        });
9486        h.shutdown(&runtime);
9487    }
9488
9489    /// 4. Heartbeat events fire on the configured interval when no real
9490    /// events arrive. Drives `build_invalidate_stream` at a 1-second
9491    /// heartbeat (the public handler uses 30s in prod), wraps it in an
9492    /// `Sse` response, then reads + parses the SSE body via the same
9493    /// `read_one_sse_event` helper the HTTP-layer tests use. This
9494    /// exercises the public Event → body byte path without touching
9495    /// `Event::finalize` (which is private).
9496    #[test]
9497    fn stream_emits_heartbeat_when_no_events() {
9498        let runtime = rt();
9499        let h = Harness::new(&runtime);
9500        let sender = h.invalidate_sender();
9501        runtime.block_on(async {
9502            // Subscribe FIRST so a later writer-side `send` would lag
9503            // the receiver if the subscriber stalled.
9504            let rx = sender.subscribe();
9505            // Build the SSE stream with a 1-second heartbeat interval —
9506            // bypassing the 30s production default.
9507            let stream = build_invalidate_stream(rx, "default".to_string(), 1);
9508            // Wrap in an Sse response + extract the body bytes through
9509            // axum's IntoResponse path. This produces real on-the-wire
9510            // SSE bytes that `read_one_sse_event` can parse.
9511            let sse: Sse<_> = Sse::new(stream);
9512            let resp = sse.into_response();
9513            let mut body = resp.into_body();
9514            // First event must be `init`.
9515            let first =
9516                read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9517                    .await
9518                    .expect("init event must arrive");
9519            assert_eq!(first.event, "init");
9520            // Second must be heartbeat (no invalidates fired, ~1s
9521            // interval; allow 3s window for runtime jitter).
9522            let second =
9523                read_one_sse_event(&mut body, std::time::Duration::from_secs(3))
9524                    .await
9525                    .expect("heartbeat event must arrive within 3s");
9526            assert_eq!(second.event, "heartbeat");
9527            assert!(second.data["ts_ms"].is_number());
9528        });
9529        h.shutdown(&runtime);
9530    }
9531
9532    /// 5. Two subscribers connected to the same tenant both receive
9533    /// every invalidate.
9534    #[test]
9535    fn stream_concurrent_subscribers_same_tenant() {
9536        let runtime = rt();
9537        let h = Harness::new(&runtime);
9538        let r1 = h.router.clone();
9539        let r2 = h.router.clone();
9540        let r3 = h.router.clone();
9541        let sender = h.invalidate_sender();
9542        runtime.block_on(async {
9543            // Open three subscribers.
9544            let (s1, mut body1) = open_sse_stream_inner(r1, None, None).await;
9545            let (s2, mut body2) = open_sse_stream_inner(r2, None, None).await;
9546            let (s3, mut body3) = open_sse_stream_inner(r3, None, None).await;
9547            assert_eq!(s1, StatusCode::OK);
9548            assert_eq!(s2, StatusCode::OK);
9549            assert_eq!(s3, StatusCode::OK);
9550            // Drain init events from each.
9551            for body in [&mut body1, &mut body2, &mut body3] {
9552                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
9553                    .await
9554                    .unwrap();
9555                assert_eq!(ev.event, "init");
9556            }
9557            // Receiver count should be at least 3 now.
9558            assert!(
9559                sender.receiver_count() >= 3,
9560                "expected ≥3 subscribers, got {}",
9561                sender.receiver_count()
9562            );
9563            // Fire one invalidate.
9564            sender
9565                .send(InvalidateEvent {
9566                    reason: "memory.remember".to_string(),
9567                    tenant_id: "default".to_string(),
9568                    ts_ms: 1_715_625_600_000,
9569                    kind: "episode".to_string(),
9570                })
9571                .expect("send must succeed");
9572            // All three receive it.
9573            for body in [&mut body1, &mut body2, &mut body3] {
9574                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
9575                    .await
9576                    .unwrap();
9577                assert_eq!(ev.event, "invalidate");
9578                assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
9579            }
9580        });
9581        h.shutdown(&runtime);
9582    }
9583
9584    /// 6. Dropping the SSE client decrements the per-tenant subscriber
9585    /// count — graceful cleanup invariant.
9586    #[test]
9587    fn stream_handles_client_disconnect_gracefully() {
9588        let runtime = rt();
9589        let h = Harness::new(&runtime);
9590        let r = h.router.clone();
9591        let sender = h.invalidate_sender();
9592        let before = sender.receiver_count();
9593        runtime.block_on(async {
9594            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
9595            assert_eq!(status, StatusCode::OK);
9596            // Drain the init so the stream is fully active.
9597            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9598                .await
9599                .unwrap();
9600            let during = sender.receiver_count();
9601            assert!(
9602                during > before,
9603                "subscriber count must increase while stream is live (before={before}, during={during})"
9604            );
9605            // Drop the body — simulates the client closing the
9606            // connection. axum drops the stream future, which drops the
9607            // Receiver.
9608            drop(body);
9609        });
9610        // Allow tokio a beat to drop the Receiver task.
9611        runtime.block_on(async {
9612            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
9613        });
9614        let after = sender.receiver_count();
9615        assert!(
9616            after <= before,
9617            "subscriber count must drop back after disconnect (before={before}, after={after})"
9618        );
9619        h.shutdown(&runtime);
9620    }
9621
9622    /// 7. Bearer-auth gate: missing token -> 401.
9623    #[test]
9624    fn stream_respects_auth_when_enabled() {
9625        let runtime = rt();
9626        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
9627        let r = h.router.clone();
9628        runtime.block_on(async {
9629            let (status, _body) = open_sse_stream_inner(r, None, None).await;
9630            assert_eq!(status, StatusCode::UNAUTHORIZED);
9631        });
9632        h.shutdown(&runtime);
9633    }
9634
9635    /// 8. Anonymous OK when auth=None (loopback default).
9636    #[test]
9637    fn stream_works_with_auth_none() {
9638        let runtime = rt();
9639        let h = Harness::new(&runtime);
9640        let r = h.router.clone();
9641        runtime.block_on(async {
9642            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
9643            assert_eq!(status, StatusCode::OK);
9644            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9645                .await
9646                .expect("must receive init event");
9647            assert_eq!(ev.event, "init");
9648        });
9649        h.shutdown(&runtime);
9650    }
9651
9652    /// 9. Bearer-auth gate: valid token allows the stream to open.
9653    #[test]
9654    fn stream_respects_auth_accepts_valid_token() {
9655        let runtime = rt();
9656        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
9657        let r = h.router.clone();
9658        runtime.block_on(async {
9659            let (status, mut body) =
9660                open_sse_stream_inner(r, Some("Bearer stream-secret"), None).await;
9661            assert_eq!(status, StatusCode::OK);
9662            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9663                .await
9664                .expect("must receive init event with valid bearer");
9665            assert_eq!(ev.event, "init");
9666            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
9667        });
9668        h.shutdown(&runtime);
9669    }
9670
9671    /// 10. Cross-tenant lookups are 404 at TenantExtractor before the
9672    /// stream opens — wrong tenant header never reaches the handler.
9673    #[test]
9674    fn stream_respects_tenant_scoping() {
9675        let runtime = rt();
9676        let h = Harness::new(&runtime);
9677        let r = h.router.clone();
9678        runtime.block_on(async {
9679            let (status, _body) =
9680                open_sse_stream_inner(r, None, Some("never-registered-tenant-x")).await;
9681            // The single-tenant test registry returns NotFound from
9682            // get_or_open when the header points to a tenant that isn't
9683            // cached; the TenantExtractor maps that to 404.
9684            assert_eq!(status, StatusCode::NOT_FOUND);
9685        });
9686        h.shutdown(&runtime);
9687    }
9688
9689    // -----------------------------------------------------------------
9690    // /v1/tenants — principal-scoped tenant list (v0.10.0)
9691    //
9692    // Seeds the harness's in-memory tenants_index stub via
9693    // `harness.registry.with_index(|idx| idx.register(...))` to drive
9694    // the read-only list endpoint. The default tenant from the
9695    // harness's HashMap is NOT in the index stub by construction (the
9696    // `for_tests_with_single_tenant` factory only wires the cached
9697    // HashMap entry; the index starts empty after migrations), so each
9698    // test that wants the default tenant listed registers it
9699    // explicitly. This keeps the test setup explicit about what's
9700    // visible to `list_active` versus what's open in memory.
9701    // -----------------------------------------------------------------
9702
9703    /// Seed three Active tenants into the registry's index. Returns the
9704    /// ids in the order they were registered, which is the order
9705    /// `list_active` will return them in (ORDER BY created_at_ms ASC).
9706    async fn seed_three_tenants(registry: &TenantRegistry) -> Vec<String> {
9707        use solo_core::TenantId as TenantIdT;
9708        let ids = ["alice", "bob", "default"];
9709        for id in ids {
9710            let tid = TenantIdT::new(id).unwrap();
9711            registry
9712                .with_index(|idx| {
9713                    idx.register(&tid, &format!("{id}.db"), Some(&format!("{id} tenant")))
9714                        .unwrap();
9715                    // Ensure created_at_ms diverges so the ASC sort is
9716                    // deterministic — the index uses `chrono::Utc::now()`
9717                    // per row and 3 sequential inserts can land in the
9718                    // same ms on fast hardware.
9719                })
9720                .await;
9721            tokio::time::sleep(std::time::Duration::from_millis(2)).await;
9722        }
9723        // Sort matches the `created_at_ms ASC, tenant_id ASC` order
9724        // `TenantsIndex::list` returns. We inserted in (alice, bob,
9725        // default) order with 2ms gaps, so that's the expected order.
9726        vec!["alice".into(), "bob".into(), "default".into()]
9727    }
9728
9729    /// 1. With `AuthConfig::None`, the handler returns every tenant
9730    ///    visible in the registry — same scope as `solo tenants list`.
9731    ///    Exercises the "no principal" branch of the visibility filter.
9732    #[test]
9733    fn tenants_returns_all_when_auth_none() {
9734        let runtime = rt();
9735        let h = Harness::new(&runtime);
9736        let r = h.router.clone();
9737        runtime.block_on(async {
9738            let _expected = seed_three_tenants(&h.registry).await;
9739            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9740            assert_eq!(status, StatusCode::OK);
9741            let arr = body
9742                .get("tenants")
9743                .and_then(|v| v.as_array())
9744                .expect("tenants array");
9745            assert_eq!(arr.len(), 3, "got body: {body}");
9746            let ids: Vec<&str> =
9747                arr.iter().filter_map(|t| t["id"].as_str()).collect();
9748            assert_eq!(ids, vec!["alice", "bob", "default"]);
9749        });
9750        h.shutdown(&runtime);
9751    }
9752
9753    /// 2. Under Bearer auth (single-principal mode), the handler
9754    ///    returns every tenant — the bearer holder is treated as the
9755    ///    daemon operator with full visibility. Exercises the bearer
9756    ///    branch of the visibility filter.
9757    #[test]
9758    fn tenants_returns_all_when_bearer_auth() {
9759        let runtime = rt();
9760        let h = Harness::new_with_auth(&runtime, Some("tlist-secret".into()));
9761        let r = h.router.clone();
9762        runtime.block_on(async {
9763            seed_three_tenants(&h.registry).await;
9764            let (status, body) = call_with_auth(
9765                r,
9766                "GET",
9767                "/v1/tenants",
9768                None,
9769                Some("Bearer tlist-secret"),
9770            )
9771            .await;
9772            assert_eq!(status, StatusCode::OK, "got body: {body}");
9773            let arr = body["tenants"].as_array().expect("tenants array");
9774            assert_eq!(arr.len(), 3, "bearer must see all tenants");
9775        });
9776        h.shutdown(&runtime);
9777    }
9778
9779    /// 3. Under OIDC, an authenticated principal carrying
9780    ///    `tenant_claim = "alice"` sees ONLY alice — not bob, not
9781    ///    default. Exercises the OIDC branch of the visibility filter.
9782    #[test]
9783    fn tenants_filters_to_principal_claim_when_oidc() {
9784        let runtime = rt();
9785        let (fake_server, discovery_url, secret, kid) =
9786            runtime.block_on(async { spin_fake_idp().await });
9787        let server_uri = fake_server.uri();
9788        let _server_guard = fake_server;
9789
9790        let auth = crate::auth::AuthConfig::Oidc {
9791            discovery_url,
9792            audience: "tlist-audience".to_string(),
9793            tenant_claim_name: "solo_tenant".to_string(),
9794        };
9795        let h = Harness::new_with_auth_config(&runtime, Some(auth));
9796        let r = h.router.clone();
9797
9798        runtime.block_on(async {
9799            seed_three_tenants(&h.registry).await;
9800            let token = mint_idp_token(
9801                &server_uri,
9802                kid,
9803                &secret,
9804                "alice",
9805                "tlist-audience",
9806            );
9807            let (status, body) = call_with_auth(
9808                r,
9809                "GET",
9810                "/v1/tenants",
9811                None,
9812                Some(&format!("Bearer {token}")),
9813            )
9814            .await;
9815            assert_eq!(status, StatusCode::OK, "got body: {body}");
9816            let arr = body["tenants"].as_array().expect("tenants array");
9817            assert_eq!(arr.len(), 1, "OIDC alice must see exactly one tenant");
9818            assert_eq!(arr[0]["id"].as_str(), Some("alice"));
9819        });
9820        h.shutdown(&runtime);
9821    }
9822
9823    /// 4. Under OIDC with a `tenant_claim` that doesn't match any
9824    ///    registered tenant, the response is `200 OK` with
9825    ///    `tenants: []` — NOT 404. Don't leak whether other tenants
9826    ///    exist via a status-code side-channel for an OIDC principal
9827    ///    that lacks visibility to them.
9828    #[test]
9829    fn tenants_returns_empty_when_oidc_claim_unmatched() {
9830        let runtime = rt();
9831        let (fake_server, discovery_url, secret, kid) =
9832            runtime.block_on(async { spin_fake_idp().await });
9833        let server_uri = fake_server.uri();
9834        let _server_guard = fake_server;
9835
9836        let auth = crate::auth::AuthConfig::Oidc {
9837            discovery_url,
9838            audience: "tlist-audience".to_string(),
9839            tenant_claim_name: "solo_tenant".to_string(),
9840        };
9841        let h = Harness::new_with_auth_config(&runtime, Some(auth));
9842        let r = h.router.clone();
9843
9844        runtime.block_on(async {
9845            seed_three_tenants(&h.registry).await;
9846            // Mint a token claiming a tenant that IS a valid TenantId
9847            // (passes middleware) but doesn't exist in the index.
9848            let token = mint_idp_token(
9849                &server_uri,
9850                kid,
9851                &secret,
9852                "nonexistent",
9853                "tlist-audience",
9854            );
9855            let (status, body) = call_with_auth(
9856                r,
9857                "GET",
9858                "/v1/tenants",
9859                None,
9860                Some(&format!("Bearer {token}")),
9861            )
9862            .await;
9863            assert_eq!(
9864                status,
9865                StatusCode::OK,
9866                "must be 200 OK, not 404 — don't leak tenant existence: {body}"
9867            );
9868            let arr = body["tenants"].as_array().expect("tenants array");
9869            assert_eq!(
9870                arr.len(),
9871                0,
9872                "unmatched OIDC claim must produce empty list, got: {body}"
9873            );
9874        });
9875        h.shutdown(&runtime);
9876    }
9877
9878    /// 5. JSON response shape matches what solo-web's TypeScript
9879    ///    client expects: `tenants[*].{id,display_name,created_at_ms,
9880    ///    status,quota_bytes,episode_count,size_bytes,pct_used,
9881    ///    last_accessed_ms}`. Catches accidental field renames at PR
9882    ///    time.
9883    ///
9884    ///    v0.10.1: `episode_count` / `size_bytes` / `pct_used` are
9885    ///    hydrated when the per-tenant DB file exists. This test
9886    ///    registers a tenant whose DB file does NOT exist (the
9887    ///    `for_tests_with_single_tenant` harness only writes the
9888    ///    `default` tenant's DB), so the three numeric fields land as
9889    ///    JSON `null` — verifying the `null` JSON value (not absence)
9890    ///    so clients see a stable shape regardless of hydration
9891    ///    success.
9892    #[test]
9893    fn tenants_response_shape_matches_solo_web_types() {
9894        let runtime = rt();
9895        let h = Harness::new(&runtime);
9896        let r = h.router.clone();
9897        runtime.block_on(async {
9898            // Register one tenant with a display_name + quota so all
9899            // optional fields are present in the response.
9900            let tid = solo_core::TenantId::new("shaped").unwrap();
9901            h.registry
9902                .with_index(|idx| {
9903                    idx.register_with_quota(
9904                        &tid,
9905                        "shaped.db",
9906                        Some("Shaped tenant"),
9907                        Some(1_048_576),
9908                    )
9909                    .unwrap();
9910                })
9911                .await;
9912            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9913            assert_eq!(status, StatusCode::OK);
9914            let item = &body["tenants"][0];
9915            // id, display_name, created_at_ms, status: required
9916            assert_eq!(item["id"].as_str(), Some("shaped"));
9917            assert_eq!(item["display_name"].as_str(), Some("Shaped tenant"));
9918            assert!(
9919                item["created_at_ms"].is_i64(),
9920                "created_at_ms must be an i64, got {item}"
9921            );
9922            assert_eq!(item["status"].as_str(), Some("active"));
9923            // quota_bytes: present + numeric
9924            assert_eq!(item["quota_bytes"].as_u64(), Some(1_048_576));
9925            // v0.10.1: episode_count / size_bytes / pct_used become
9926            // null when the per-tenant DB file is missing on disk
9927            // (this harness only writes the default tenant's file —
9928            // shaped.db does not exist). Clients must tolerate the
9929            // null JSON shape; absence would be a breaking change.
9930            assert!(
9931                item["episode_count"].is_null(),
9932                "episode_count must be JSON null when tenant DB is missing, got {item}"
9933            );
9934            assert!(
9935                item["size_bytes"].is_null(),
9936                "size_bytes must be JSON null when tenant DB is missing, got {item}"
9937            );
9938            assert!(
9939                item["pct_used"].is_null(),
9940                "pct_used must be JSON null when size_bytes is null, got {item}"
9941            );
9942        });
9943        h.shutdown(&runtime);
9944    }
9945
9946    /// 6. Bearer auth enabled + missing Authorization header → 401
9947    ///    before the handler runs. Confirms the route is plumbed
9948    ///    through `auth_middleware` (it sits inside the `authed`
9949    ///    sub-router, not the `public` one).
9950    #[test]
9951    fn tenants_respects_auth_when_enabled() {
9952        let runtime = rt();
9953        let h = Harness::new_with_auth(&runtime, Some("must-auth".into()));
9954        let r = h.router.clone();
9955        runtime.block_on(async {
9956            seed_three_tenants(&h.registry).await;
9957            // No Authorization header → 401.
9958            let (status, _body) = call(r, "GET", "/v1/tenants", None).await;
9959            assert_eq!(status, StatusCode::UNAUTHORIZED);
9960        });
9961        h.shutdown(&runtime);
9962    }
9963
9964    /// 7. `PendingMigration` and `PendingDelete` rows are excluded
9965    ///    from the response. solo-web's tenant picker should never
9966    ///    surface a row that's mid-admin-operation (race with admin
9967    ///    tooling). Only Active tenants make the list.
9968    #[test]
9969    fn tenants_status_filter_excludes_non_active() {
9970        let runtime = rt();
9971        let h = Harness::new(&runtime);
9972        let r = h.router.clone();
9973        runtime.block_on(async {
9974            // Three tenants, three statuses. Only `keeper` (Active)
9975            // should appear on the wire.
9976            let keeper = solo_core::TenantId::new("keeper").unwrap();
9977            let migrating = solo_core::TenantId::new("migrating").unwrap();
9978            let deleting = solo_core::TenantId::new("deleting").unwrap();
9979            h.registry
9980                .with_index(|idx| {
9981                    idx.register(&keeper, "keeper.db", None).unwrap();
9982                    idx.register_with_status(
9983                        &migrating,
9984                        "migrating.db",
9985                        None,
9986                        solo_storage::TenantStatus::PendingMigration,
9987                    )
9988                    .unwrap();
9989                    idx.register_with_status(
9990                        &deleting,
9991                        "deleting.db",
9992                        None,
9993                        solo_storage::TenantStatus::PendingDelete,
9994                    )
9995                    .unwrap();
9996                })
9997                .await;
9998            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9999            assert_eq!(status, StatusCode::OK);
10000            let arr = body["tenants"].as_array().expect("tenants array");
10001            let ids: Vec<&str> =
10002                arr.iter().filter_map(|t| t["id"].as_str()).collect();
10003            assert_eq!(
10004                ids,
10005                vec!["keeper"],
10006                "only Active tenants visible; got: {body}"
10007            );
10008        });
10009        h.shutdown(&runtime);
10010    }
10011
10012    /// 8. Empty registry → `200 OK` with `tenants: []`. Defends
10013    ///    against accidental `None` serialisation or 404'ing on an
10014    ///    empty list. solo-web's first paint on a brand-new daemon
10015    ///    needs an empty array to render the "no tenants yet" state.
10016    #[test]
10017    fn tenants_returns_empty_array_when_no_tenants_registered() {
10018        let runtime = rt();
10019        let h = Harness::new(&runtime);
10020        let r = h.router.clone();
10021        runtime.block_on(async {
10022            // Don't seed anything — the harness's in-memory index
10023            // starts at zero rows (the cached default-tenant handle in
10024            // the HashMap is invisible to `list_active`).
10025            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10026            assert_eq!(status, StatusCode::OK);
10027            let arr = body["tenants"].as_array().expect("tenants array");
10028            assert_eq!(arr.len(), 0, "expected empty array, got: {body}");
10029        });
10030        h.shutdown(&runtime);
10031    }
10032
10033    // ---- v0.10.1: cost-number hydration tests ----
10034    //
10035    // These exercise `TenantRegistry::hydrate_tenant_cost_numbers` end-
10036    // to-end through the `/v1/tenants` handler. The harness's
10037    // `for_tests_with_single_tenant` registry uses a plain-SQLite tenant
10038    // DB (not real SQLCipher); the hydration helper has a fallback
10039    // open path for that case (see registry.rs). The
10040    // `_tmp_dir/tenants/<filename>` layout matters: that's where the
10041    // hydration helper looks. These tests create real files there to
10042    // exercise the size_bytes path; episode_count requires the file to
10043    // be a SQLite DB with the `episodes` table.
10044    //
10045    // The `default` tenant exists at `_tmp_dir/test.db` (set by the
10046    // harness); the hydration helper expects `_tmp_dir/tenants/<file>`.
10047    // So we either (a) register a fresh tenant id pointing at a DB we
10048    // create at the expected layout, or (b) check the documented
10049    // behavior under "file missing" (returns null counts gracefully).
10050    // Both shapes are tested here.
10051    //
10052    // The constant `TENANTS_COUNT_HYDRATION_CAP` is grep-able.
10053
10054    /// Helper: create a per-tenant DB file at the layout the hydration
10055    /// helper expects (`<data_dir>/tenants/<db_filename>`), populated
10056    /// with the `episodes` table + `n_active` active episodes +
10057    /// `n_forgotten` forgotten episodes. Returns the absolute path.
10058    fn seed_per_tenant_db_with_episodes(
10059        data_dir: &std::path::Path,
10060        db_filename: &str,
10061        n_active: i64,
10062        n_forgotten: i64,
10063    ) -> std::path::PathBuf {
10064        let tenants_dir = data_dir.join(solo_storage::TENANTS_SUBDIR);
10065        std::fs::create_dir_all(&tenants_dir).unwrap();
10066        let db_path = tenants_dir.join(db_filename);
10067        // Open as plain SQLite (test path; matches the harness's
10068        // `open_test_db_at` shape; hydration helper falls back to plain
10069        // open when SQLCipher open fails).
10070        let mut conn = rusqlite::Connection::open(&db_path).unwrap();
10071        // Run the same migrations the real per-tenant DB does so the
10072        // `episodes` table + `status` CHECK constraint match production.
10073        solo_storage::run_migrations(&mut conn).unwrap();
10074        for i in 0..n_active {
10075            conn.execute(
10076                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
10077                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'active', 0, 0)",
10078                rusqlite::params![format!("a-{i}")],
10079            )
10080            .unwrap();
10081        }
10082        for i in 0..n_forgotten {
10083            conn.execute(
10084                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
10085                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'forgotten', 0, 0)",
10086                rusqlite::params![format!("f-{i}")],
10087            )
10088            .unwrap();
10089        }
10090        drop(conn);
10091        db_path
10092    }
10093
10094    /// v0.10.1 test 1: `episode_count` hydrates to the actual active
10095    /// episode count when the per-tenant DB exists. Seed 3 active + 2
10096    /// forgotten episodes; expect `episode_count: 3` (the `status =
10097    /// 'active'` filter excludes the forgotten rows).
10098    #[test]
10099    fn tenants_response_hydrates_episode_count_when_tenant_has_data() {
10100        let runtime = rt();
10101        let h = Harness::new(&runtime);
10102        let r = h.router.clone();
10103        let data_dir = h._tmp.path().to_path_buf();
10104        runtime.block_on(async {
10105            let tid = solo_core::TenantId::new("counted").unwrap();
10106            seed_per_tenant_db_with_episodes(&data_dir, "counted.db", 3, 2);
10107            h.registry
10108                .with_index(|idx| {
10109                    idx.register(&tid, "counted.db", Some("Counted tenant"))
10110                        .unwrap();
10111                })
10112                .await;
10113            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10114            assert_eq!(status, StatusCode::OK);
10115            let item = &body["tenants"][0];
10116            assert_eq!(item["id"].as_str(), Some("counted"));
10117            assert_eq!(
10118                item["episode_count"].as_i64(),
10119                Some(3),
10120                "episode_count must be 3 (active rows only, 2 forgotten excluded); got {item}"
10121            );
10122        });
10123        h.shutdown(&runtime);
10124    }
10125
10126    /// v0.10.1 test 2: `size_bytes` reports the on-disk size of the
10127    /// per-tenant DB file. Asserts the response value matches
10128    /// `std::fs::metadata(<db_path>).len()` exactly — pins that we
10129    /// read the right file, not e.g. data_dir or a temp.
10130    #[test]
10131    fn tenants_response_hydrates_size_bytes_from_db_file() {
10132        let runtime = rt();
10133        let h = Harness::new(&runtime);
10134        let r = h.router.clone();
10135        let data_dir = h._tmp.path().to_path_buf();
10136        runtime.block_on(async {
10137            let tid = solo_core::TenantId::new("sized").unwrap();
10138            let db_path =
10139                seed_per_tenant_db_with_episodes(&data_dir, "sized.db", 1, 0);
10140            h.registry
10141                .with_index(|idx| {
10142                    idx.register(&tid, "sized.db", None).unwrap();
10143                })
10144                .await;
10145            let on_disk = std::fs::metadata(&db_path).unwrap().len();
10146            assert!(on_disk > 0, "test setup: db file should be non-empty");
10147            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10148            assert_eq!(status, StatusCode::OK);
10149            let item = &body["tenants"][0];
10150            assert_eq!(item["id"].as_str(), Some("sized"));
10151            assert_eq!(
10152                item["size_bytes"].as_u64(),
10153                Some(on_disk),
10154                "size_bytes must match fs::metadata; got {item}"
10155            );
10156        });
10157        h.shutdown(&runtime);
10158    }
10159
10160    /// v0.10.1 test 3: `pct_used` is computed from `size_bytes /
10161    /// quota_bytes * 100` when both are known. Pick a quota much
10162    /// larger than the DB so the percentage stays in a sane range
10163    /// (and survives any unrelated DB-page padding).
10164    #[test]
10165    fn tenants_response_computes_pct_used_when_quota_set() {
10166        let runtime = rt();
10167        let h = Harness::new(&runtime);
10168        let r = h.router.clone();
10169        let data_dir = h._tmp.path().to_path_buf();
10170        runtime.block_on(async {
10171            let tid = solo_core::TenantId::new("quoted").unwrap();
10172            let db_path =
10173                seed_per_tenant_db_with_episodes(&data_dir, "quoted.db", 1, 0);
10174            // Pick a quota that's large enough that pct_used lands
10175            // between 0 and 50% regardless of SQLite page boundary
10176            // rounding. Asserting an exact float would be flaky.
10177            let on_disk = std::fs::metadata(&db_path).unwrap().len();
10178            let quota = on_disk * 4; // pct_used should be ~25%
10179            h.registry
10180                .with_index(|idx| {
10181                    idx.register_with_quota(&tid, "quoted.db", None, Some(quota))
10182                        .unwrap();
10183                })
10184                .await;
10185            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10186            assert_eq!(status, StatusCode::OK);
10187            let item = &body["tenants"][0];
10188            let pct = item["pct_used"].as_f64().expect("pct_used must be a number");
10189            assert!(
10190                (0.0..=100.0).contains(&pct),
10191                "pct_used must be in [0, 100], got {pct}"
10192            );
10193            // Allow a wide band — exact value depends on SQLite page
10194            // size — but the recipe (size/quota*100) means a
10195            // size=quota/4 setup must land near 25%.
10196            assert!(
10197                (20.0..=30.0).contains(&pct),
10198                "pct_used must be ~25% for size=quota/4, got {pct}"
10199            );
10200        });
10201        h.shutdown(&runtime);
10202    }
10203
10204    /// v0.10.1 test 4: `pct_used` is `null` when `quota_bytes` is
10205    /// null (the "unlimited" case). Pins that we don't accidentally
10206    /// emit a numeric `0.0` or `100.0` for unlimited quotas.
10207    #[test]
10208    fn tenants_response_pct_used_null_when_quota_null() {
10209        let runtime = rt();
10210        let h = Harness::new(&runtime);
10211        let r = h.router.clone();
10212        let data_dir = h._tmp.path().to_path_buf();
10213        runtime.block_on(async {
10214            let tid = solo_core::TenantId::new("unlimited").unwrap();
10215            seed_per_tenant_db_with_episodes(&data_dir, "unlimited.db", 1, 0);
10216            h.registry
10217                .with_index(|idx| {
10218                    idx.register(&tid, "unlimited.db", None).unwrap();
10219                })
10220                .await;
10221            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
10222            assert_eq!(status, StatusCode::OK);
10223            let item = &body["tenants"][0];
10224            assert_eq!(item["id"].as_str(), Some("unlimited"));
10225            assert!(
10226                item["quota_bytes"].is_null(),
10227                "test setup: quota_bytes must be null, got {item}"
10228            );
10229            assert!(
10230                item["pct_used"].is_null(),
10231                "pct_used must be JSON null when quota_bytes is null, got {item}"
10232            );
10233            // size_bytes still present (no quota doesn't suppress
10234            // size — only pct_used).
10235            assert!(
10236                item["size_bytes"].is_u64(),
10237                "size_bytes must still be present when quota_bytes is null, got {item}"
10238            );
10239        });
10240        h.shutdown(&runtime);
10241    }
10242
10243    /// v0.10.1 test 5: the response includes
10244    /// `X-Solo-Tenants-Count-Cap-Reached: true` when the filtered
10245    /// tenant count exceeds `TENANTS_COUNT_HYDRATION_CAP`. Tenants
10246    /// beyond the cap have `episode_count: null` even though their
10247    /// `size_bytes` is still hydrated (fs::metadata is cheap).
10248    ///
10249    /// We don't seed 51 real DBs (would be slow); instead, we
10250    /// register 51 tenant rows in the index. The cap is documented
10251    /// to apply to `episode_count` hydration, and the header is
10252    /// emitted purely from the count of filtered records. The
10253    /// header semantics here are independent of per-tenant DB
10254    /// existence.
10255    #[test]
10256    fn tenants_response_sets_cap_reached_header_when_over_cap() {
10257        let runtime = rt();
10258        let h = Harness::new(&runtime);
10259        let r = h.router.clone();
10260        runtime.block_on(async {
10261            // Register 51 tenants (cap = 50, so we exceed it).
10262            h.registry
10263                .with_index(|idx| {
10264                    for i in 0..51 {
10265                        let id = format!("t{i:02}");
10266                        let tid = solo_core::TenantId::new(&id).unwrap();
10267                        idx.register(&tid, &format!("{id}.db"), None).unwrap();
10268                    }
10269                })
10270                .await;
10271            // Send a raw request so we can inspect headers.
10272            use axum::body::Body;
10273            use axum::http::Request;
10274            use http_body_util::BodyExt;
10275            let req = Request::builder()
10276                .method("GET")
10277                .uri("/v1/tenants")
10278                .body(Body::empty())
10279                .unwrap();
10280            let resp = r.oneshot(req).await.unwrap();
10281            assert_eq!(resp.status(), StatusCode::OK);
10282            let cap_header = resp
10283                .headers()
10284                .get(X_SOLO_TENANTS_COUNT_CAP_HEADER)
10285                .expect("cap-reached header must be present");
10286            assert_eq!(
10287                cap_header.to_str().unwrap(),
10288                "true",
10289                "cap-reached header value must be 'true' when over cap"
10290            );
10291            // Parse body to verify shape — beyond-cap tenants have
10292            // null episode_count.
10293            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
10294            let body: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
10295            let arr = body["tenants"].as_array().expect("tenants array");
10296            assert_eq!(arr.len(), 51, "got {} tenants", arr.len());
10297            // The last (sorted-by-created_at_ms) tenant should be
10298            // beyond the cap. The hydration order matches the
10299            // filtered list order, so index 50 is the 51st tenant
10300            // and should have null episode_count.
10301            assert!(
10302                arr[50]["episode_count"].is_null(),
10303                "the 51st tenant (beyond cap) must have null episode_count, got {}",
10304                arr[50]
10305            );
10306        });
10307        h.shutdown(&runtime);
10308    }
10309
10310    /// v0.10.1 test 6: when the response is under the cap, the
10311    /// `X-Solo-Tenants-Count-Cap-Reached` header is absent. Pin the
10312    /// negative case so a future refactor that always emits the
10313    /// header (with "false") doesn't pass silently.
10314    #[test]
10315    fn tenants_response_omits_cap_header_when_under_cap() {
10316        let runtime = rt();
10317        let h = Harness::new(&runtime);
10318        let r = h.router.clone();
10319        runtime.block_on(async {
10320            seed_three_tenants(&h.registry).await;
10321            use axum::body::Body;
10322            use axum::http::Request;
10323            let req = Request::builder()
10324                .method("GET")
10325                .uri("/v1/tenants")
10326                .body(Body::empty())
10327                .unwrap();
10328            let resp = r.oneshot(req).await.unwrap();
10329            assert_eq!(resp.status(), StatusCode::OK);
10330            assert!(
10331                resp.headers().get(X_SOLO_TENANTS_COUNT_CAP_HEADER).is_none(),
10332                "cap-reached header must be absent under the cap"
10333            );
10334        });
10335        h.shutdown(&runtime);
10336    }
10337
10338    // ---- Pure unit tests on the visibility filter ----
10339    //
10340    // These exercise `filter_tenants_for_principal` and
10341    // `is_single_principal_bearer` without an axum router — fast
10342    // feedback for the load-bearing visibility rule. The
10343    // router-level tests above cover the wire path.
10344
10345    /// Build a synthetic `TenantRecord` so the pure unit tests don't
10346    /// need a real SQLCipher round-trip.
10347    fn make_record(id: &str) -> solo_storage::TenantRecord {
10348        solo_storage::TenantRecord {
10349            tenant_id: solo_core::TenantId::new(id).unwrap(),
10350            db_filename: format!("{id}.db"),
10351            display_name: None,
10352            created_at_ms: 0,
10353            status: solo_storage::TenantStatus::Active,
10354            quota_bytes: None,
10355            last_accessed_ms: None,
10356        }
10357    }
10358
10359    #[test]
10360    fn filter_no_principal_returns_all() {
10361        let records = vec![make_record("a"), make_record("b")];
10362        let out = filter_tenants_for_principal(records.clone(), None);
10363        assert_eq!(out.len(), 2);
10364        assert_eq!(out[0].tenant_id.as_str(), "a");
10365        assert_eq!(out[1].tenant_id.as_str(), "b");
10366    }
10367
10368    #[test]
10369    fn filter_bearer_principal_returns_all() {
10370        let records = vec![make_record("a"), make_record("b")];
10371        let p = AuthenticatedPrincipal::bearer(
10372            solo_core::TenantId::new("a").unwrap(),
10373        );
10374        let out = filter_tenants_for_principal(records, Some(&p));
10375        assert_eq!(out.len(), 2);
10376    }
10377
10378    #[test]
10379    fn filter_oidc_principal_keeps_only_claim() {
10380        let records = vec![make_record("a"), make_record("b"), make_record("c")];
10381        // OIDC-flavoured principal: non-bearer subject + JSON-object claims.
10382        let p = AuthenticatedPrincipal {
10383            subject: "alice@example.com".to_string(),
10384            tenant_claim: Some(solo_core::TenantId::new("b").unwrap()),
10385            scopes: vec!["read".to_string()],
10386            claims: serde_json::json!({ "sub": "alice@example.com" }),
10387        };
10388        let out = filter_tenants_for_principal(records, Some(&p));
10389        assert_eq!(out.len(), 1);
10390        assert_eq!(out[0].tenant_id.as_str(), "b");
10391    }
10392
10393    #[test]
10394    fn filter_oidc_principal_with_no_claim_returns_empty() {
10395        // Theoretically unreachable — middleware short-circuits at 403
10396        // before we see a no-claim OIDC principal. Defend anyway.
10397        let records = vec![make_record("a")];
10398        let p = AuthenticatedPrincipal {
10399            subject: "alice@example.com".to_string(),
10400            tenant_claim: None,
10401            scopes: vec![],
10402            claims: serde_json::json!({ "sub": "alice@example.com" }),
10403        };
10404        let out = filter_tenants_for_principal(records, Some(&p));
10405        assert!(out.is_empty());
10406    }
10407
10408    #[test]
10409    fn is_single_principal_bearer_discriminator() {
10410        let bearer = AuthenticatedPrincipal::bearer(
10411            solo_core::TenantId::new("default").unwrap(),
10412        );
10413        assert!(is_single_principal_bearer(&bearer));
10414
10415        let oidc = AuthenticatedPrincipal {
10416            subject: "alice".to_string(),
10417            tenant_claim: Some(solo_core::TenantId::new("alice").unwrap()),
10418            scopes: vec![],
10419            claims: serde_json::json!({ "x": 1 }),
10420        };
10421        assert!(!is_single_principal_bearer(&oidc));
10422
10423        // Subject == "bearer" but claims is a non-null object → not a
10424        // bearer-shaped principal. Defends against a forged-bearer
10425        // shape that might smuggle JWT claims.
10426        let weird = AuthenticatedPrincipal {
10427            subject: "bearer".to_string(),
10428            tenant_claim: Some(solo_core::TenantId::default_tenant()),
10429            scopes: vec![],
10430            claims: serde_json::json!({ "leak": 1 }),
10431        };
10432        assert!(!is_single_principal_bearer(&weird));
10433    }
10434
10435    // ---------------------------------------------------------------
10436    // v0.10.2 — MCP-over-HTTP transport on /mcp
10437    // ---------------------------------------------------------------
10438    //
10439    // These tests pin the wire contract for the new `/mcp` route added
10440    // in v0.10.2 P2. We exercise the route through the same `Harness`
10441    // pattern the rest of the file uses (in-process axum Router via
10442    // `tower::ServiceExt::oneshot`) — no real TCP listener needed.
10443    //
10444    // The dispatcher's unit tests live in `mcp_dispatch::tests` and
10445    // cover the JSON-RPC envelope shape in isolation. These tests are
10446    // the integration layer: real `TenantHandle`, real `WriterActor`,
10447    // real `SoloMcpServer::dispatch_tool` path.
10448
10449    /// `POST /mcp` with `{jsonrpc, id, method: "tools/list"}` returns
10450    /// the canonical 14 tools. Matches the stdio smoke test
10451    /// `mcp_stdio_lists_fourteen_canonical_tools` from
10452    /// `crates/solo-cli/tests/mcp_smoke.rs` so any drift between the
10453    /// two transports fails one of the two suites loudly.
10454    #[test]
10455    fn mcp_http_tools_list_returns_fourteen_canonical_tools() {
10456        let runtime = rt();
10457        let h = Harness::new(&runtime);
10458        let r = h.router.clone();
10459        runtime.block_on(async move {
10460            let req = json!({
10461                "jsonrpc": "2.0",
10462                "id": 1,
10463                "method": "tools/list",
10464            });
10465            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
10466            assert_eq!(status, StatusCode::OK);
10467            assert_eq!(body.get("jsonrpc").and_then(|v| v.as_str()), Some("2.0"));
10468            assert_eq!(body.get("id").and_then(|v| v.as_i64()), Some(1));
10469            let tools = body
10470                .pointer("/result/tools")
10471                .and_then(|v| v.as_array())
10472                .unwrap_or_else(|| panic!("missing /result/tools: {body}"));
10473            let mut names: Vec<String> = tools
10474                .iter()
10475                .filter_map(|t| t.get("name").and_then(|n| n.as_str()).map(String::from))
10476                .collect();
10477            names.sort();
10478            assert_eq!(
10479                names,
10480                vec![
10481                    "memory_contradictions".to_string(),
10482                    "memory_facts_about".to_string(),
10483                    "memory_forget".to_string(),
10484                    "memory_forget_document".to_string(),
10485                    "memory_ingest_document".to_string(),
10486                    "memory_inspect".to_string(),
10487                    "memory_inspect_cluster".to_string(),
10488                    "memory_inspect_document".to_string(),
10489                    "memory_list_documents".to_string(),
10490                    "memory_recall".to_string(),
10491                    "memory_remember".to_string(),
10492                    "memory_remember_batch".to_string(),
10493                    "memory_search_docs".to_string(),
10494                    "memory_themes".to_string(),
10495                ],
10496                "mcp_http: tools/list returned unexpected name set"
10497            );
10498        });
10499        h.shutdown(&runtime);
10500    }
10501
10502    /// `POST /mcp` with `tools/call` for `memory_remember` writes the
10503    /// episode and returns a confirmation string. Then a separate
10504    /// `GET /v1/graph/nodes` call (REST surface) sees the episode —
10505    /// proving one process is serving both surfaces against the same
10506    /// writer.
10507    #[test]
10508    fn mcp_http_remember_writes_episode_visible_via_graph_nodes() {
10509        let runtime = rt();
10510        let h = Harness::new(&runtime);
10511        let r = h.router.clone();
10512        runtime.block_on(async move {
10513            // 1. memory_remember via /mcp.
10514            let req = json!({
10515                "jsonrpc": "2.0",
10516                "id": 2,
10517                "method": "tools/call",
10518                "params": {
10519                    "name": "memory_remember",
10520                    "arguments": { "content": "mcp-http-cross-surface-smoke" },
10521                },
10522            });
10523            let (status, body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
10524            assert_eq!(status, StatusCode::OK);
10525            let result_text = body
10526                .pointer("/result/content/0/text")
10527                .and_then(|v| v.as_str())
10528                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
10529            assert!(
10530                result_text.starts_with("remembered "),
10531                "expected `remembered <id>`, got: {result_text}"
10532            );
10533
10534            // 2. Confirm via /v1/graph/nodes (REST). Same writer, same
10535            //    tenant — the cross-surface smoke that motivates v0.10.2.
10536            //    Episode nodes carry the content under `label` +
10537            //    `preview` (the v0.10.0 graph-nodes wire shape).
10538            let (status2, nodes_body) =
10539                call(r, "GET", "/v1/graph/nodes?kind=episode&limit=10", None).await;
10540            assert_eq!(status2, StatusCode::OK);
10541            let nodes = nodes_body
10542                .get("nodes")
10543                .and_then(|v| v.as_array())
10544                .unwrap_or_else(|| panic!("missing nodes: {nodes_body}"));
10545            assert!(
10546                nodes.iter().any(|n| {
10547                    let label_hit = n
10548                        .get("label")
10549                        .and_then(|c| c.as_str())
10550                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
10551                    let preview_hit = n
10552                        .get("preview")
10553                        .and_then(|c| c.as_str())
10554                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
10555                    label_hit || preview_hit
10556                }),
10557                "graph/nodes didn't surface the MCP-written episode: {nodes_body}"
10558            );
10559        });
10560        h.shutdown(&runtime);
10561    }
10562
10563    /// `POST /mcp` with `tools/call` for `memory_recall` returns the
10564    /// just-remembered episode. Smoke for the read path under the new
10565    /// transport.
10566    #[test]
10567    fn mcp_http_recall_returns_just_remembered_episode() {
10568        let runtime = rt();
10569        let h = Harness::new(&runtime);
10570        let r = h.router.clone();
10571        runtime.block_on(async move {
10572            // Remember first.
10573            let needle = "mcp-http-recall-needle-deadbeef";
10574            let req = json!({
10575                "jsonrpc": "2.0",
10576                "id": 3,
10577                "method": "tools/call",
10578                "params": {
10579                    "name": "memory_remember",
10580                    "arguments": { "content": needle },
10581                },
10582            });
10583            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
10584            assert_eq!(status, StatusCode::OK);
10585
10586            // Recall via the same /mcp transport.
10587            let req = json!({
10588                "jsonrpc": "2.0",
10589                "id": 4,
10590                "method": "tools/call",
10591                "params": {
10592                    "name": "memory_recall",
10593                    "arguments": { "query": needle, "limit": 5 },
10594                },
10595            });
10596            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
10597            assert_eq!(status, StatusCode::OK);
10598            let recall_text = body
10599                .pointer("/result/content/0/text")
10600                .and_then(|v| v.as_str())
10601                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
10602            assert!(
10603                recall_text.contains(needle),
10604                "recall didn't surface needle `{needle}`: {recall_text}"
10605            );
10606        });
10607        h.shutdown(&runtime);
10608    }
10609
10610    /// Malformed JSON body must surface as 400 (the wire envelope is
10611    /// invalid; the JSON-RPC layer never sees the request). The error
10612    /// body shape matches the rest of the API (`{error, status}`) so
10613    /// existing client error-handling paths keep working.
10614    #[test]
10615    fn mcp_http_malformed_body_returns_400() {
10616        let runtime = rt();
10617        let h = Harness::new(&runtime);
10618        let r = h.router.clone();
10619        runtime.block_on(async move {
10620            let req = Request::builder()
10621                .method("POST")
10622                .uri("/mcp")
10623                .header("content-type", "application/json")
10624                .body(Body::from("not-json-at-all".as_bytes()))
10625                .unwrap();
10626            let resp = r.oneshot(req).await.unwrap();
10627            assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
10628            let body_bytes =
10629                resp.into_body().collect().await.unwrap().to_bytes();
10630            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
10631            assert!(
10632                v.get("error")
10633                    .and_then(|e| e.as_str())
10634                    .map(|s| s.contains("invalid JSON-RPC request"))
10635                    .unwrap_or(false),
10636                "got: {v}"
10637            );
10638        });
10639        h.shutdown(&runtime);
10640    }
10641
10642    /// Wrong `jsonrpc` version must surface as 400. JSON-RPC 2.0 §4
10643    /// requires the literal string `"2.0"`.
10644    #[test]
10645    fn mcp_http_wrong_jsonrpc_version_returns_400() {
10646        let runtime = rt();
10647        let h = Harness::new(&runtime);
10648        let r = h.router.clone();
10649        runtime.block_on(async move {
10650            let req = json!({
10651                "jsonrpc": "1.0",
10652                "id": 1,
10653                "method": "tools/list",
10654            });
10655            let (status, _body) = call(r, "POST", "/mcp", Some(req)).await;
10656            assert_eq!(status, StatusCode::BAD_REQUEST);
10657        });
10658        h.shutdown(&runtime);
10659    }
10660
10661    /// Unknown method returns a JSON-RPC error envelope with code
10662    /// -32601 (METHOD_NOT_FOUND). HTTP status stays 200 because the
10663    /// envelope itself parsed fine — JSON-RPC errors are in-body.
10664    #[test]
10665    fn mcp_http_unknown_method_returns_in_body_method_not_found() {
10666        let runtime = rt();
10667        let h = Harness::new(&runtime);
10668        let r = h.router.clone();
10669        runtime.block_on(async move {
10670            let req = json!({
10671                "jsonrpc": "2.0",
10672                "id": 5,
10673                "method": "definitely/not/a/method",
10674            });
10675            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
10676            assert_eq!(status, StatusCode::OK);
10677            assert_eq!(
10678                body.pointer("/error/code").and_then(|v| v.as_i64()),
10679                Some(-32601),
10680                "expected JSON-RPC METHOD_NOT_FOUND (-32601), got: {body}"
10681            );
10682        });
10683        h.shutdown(&runtime);
10684    }
10685
10686    /// `POST /mcp` with the bearer-auth middleware enabled returns
10687    /// 401 without the token and 200 with the correct token.
10688    #[test]
10689    fn mcp_http_post_respects_bearer_auth() {
10690        let runtime = rt();
10691        let h = Harness::new_with_auth(&runtime, Some("secret-mcp-token".into()));
10692        let r = h.router.clone();
10693        runtime.block_on(async move {
10694            // No Authorization header → 401.
10695            let req = json!({
10696                "jsonrpc": "2.0",
10697                "id": 6,
10698                "method": "tools/list",
10699            });
10700            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req.clone())).await;
10701            assert_eq!(status, StatusCode::UNAUTHORIZED);
10702
10703            // With correct bearer → 200 + valid JSON-RPC reply.
10704            let (status, body) = call_with_auth(
10705                r,
10706                "POST",
10707                "/mcp",
10708                Some(req),
10709                Some("Bearer secret-mcp-token"),
10710            )
10711            .await;
10712            assert_eq!(status, StatusCode::OK);
10713            assert_eq!(
10714                body.pointer("/result/tools").and_then(|v| v.as_array()).map(|a| a.len()),
10715                Some(14),
10716                "authed tools/list should still return 14 tools: {body}"
10717            );
10718        });
10719        h.shutdown(&runtime);
10720    }
10721
10722    /// CORS preflight (`OPTIONS /mcp`) from a localhost origin returns
10723    /// 200 (tower-http's CorsLayer handles preflight implicitly) and
10724    /// the `access-control-allow-headers` carries both
10725    /// `x-solo-tenant` and `mcp-session-id`. Pins the v0.10.2
10726    /// allow-list addition.
10727    #[test]
10728    fn mcp_http_cors_preflight_allows_mcp_session_id_header() {
10729        let runtime = rt();
10730        let h = Harness::new(&runtime);
10731        let r = h.router.clone();
10732        runtime.block_on(async move {
10733            let req = Request::builder()
10734                .method("OPTIONS")
10735                .uri("/mcp")
10736                .header("origin", "http://localhost:5173")
10737                .header("access-control-request-method", "POST")
10738                .header(
10739                    "access-control-request-headers",
10740                    "content-type, mcp-session-id, x-solo-tenant, authorization",
10741                )
10742                .body(Body::empty())
10743                .unwrap();
10744            let resp = r.oneshot(req).await.unwrap();
10745            // tower-http CorsLayer returns 200 for permitted preflight.
10746            assert_eq!(resp.status(), StatusCode::OK);
10747            let allow_headers = resp
10748                .headers()
10749                .get("access-control-allow-headers")
10750                .and_then(|h| h.to_str().ok())
10751                .unwrap_or("")
10752                .to_lowercase();
10753            assert!(
10754                allow_headers.contains("mcp-session-id"),
10755                "preflight allow-headers must include mcp-session-id; got: {allow_headers}"
10756            );
10757            assert!(
10758                allow_headers.contains("x-solo-tenant"),
10759                "preflight allow-headers must still include x-solo-tenant; got: {allow_headers}"
10760            );
10761            // Allow-origin must echo the localhost origin (per the
10762            // permissive-localhost predicate).
10763            let allow_origin = resp
10764                .headers()
10765                .get("access-control-allow-origin")
10766                .and_then(|h| h.to_str().ok())
10767                .unwrap_or("");
10768            assert_eq!(allow_origin, "http://localhost:5173");
10769        });
10770        h.shutdown(&runtime);
10771    }
10772
10773    /// Notification messages (no `id`) return 202 Accepted with an
10774    /// empty body. Per JSON-RPC 2.0 §4.1 the server MUST NOT reply.
10775    #[test]
10776    fn mcp_http_notification_returns_202_accepted() {
10777        let runtime = rt();
10778        let h = Harness::new(&runtime);
10779        let r = h.router.clone();
10780        runtime.block_on(async move {
10781            let req = json!({
10782                "jsonrpc": "2.0",
10783                "method": "notifications/initialized",
10784                "params": {},
10785            });
10786            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
10787            assert_eq!(status, StatusCode::ACCEPTED);
10788            // Empty body — call() returns Value::Null when the body is
10789            // empty.
10790            assert_eq!(body, Value::Null);
10791        });
10792        h.shutdown(&runtime);
10793    }
10794
10795    // ---------------------------------------------------------------
10796    // v0.11.0 P1 — MCP `Mcp-Session-Id` middleware integration tests
10797    // ---------------------------------------------------------------
10798    //
10799    // These pin the per-request session contract: the POST handler
10800    // creates a fresh session id on a request that arrives without
10801    // the header (echoed back via `Mcp-Session-Id` response header);
10802    // a subsequent request carrying that same id continues using the
10803    // same session record; unknown or stale ids surface as 404 with
10804    // a re-init instruction. The lazy/background expiry semantics are
10805    // unit-tested in `mcp_session::tests`.
10806
10807    /// `POST /mcp` with `tools/list` (no `Mcp-Session-Id` header) must
10808    /// echo back a fresh session id in the response header. The
10809    /// session count in the store grows by exactly 1.
10810    #[test]
10811    fn mcp_post_without_session_id_creates_new_session() {
10812        let runtime = rt();
10813        let h = Harness::new(&runtime);
10814        let r = h.router.clone();
10815        runtime.block_on(async move {
10816            let req = Request::builder()
10817                .method("POST")
10818                .uri("/mcp")
10819                .header("content-type", "application/json")
10820                .body(Body::from(
10821                    serde_json::to_vec(&json!({
10822                        "jsonrpc": "2.0",
10823                        "id": 100,
10824                        "method": "tools/list",
10825                    }))
10826                    .unwrap(),
10827                ))
10828                .unwrap();
10829            let resp = r.oneshot(req).await.unwrap();
10830            assert_eq!(resp.status(), StatusCode::OK);
10831            let session_id = resp
10832                .headers()
10833                .get("mcp-session-id")
10834                .and_then(|v| v.to_str().ok())
10835                .map(|s| s.to_string())
10836                .unwrap_or_else(|| {
10837                    panic!(
10838                        "mcp-session-id response header missing on session-init POST: {:?}",
10839                        resp.headers()
10840                    )
10841                });
10842            assert!(
10843                !session_id.is_empty(),
10844                "session id must be a non-empty string"
10845            );
10846        });
10847        h.shutdown(&runtime);
10848    }
10849
10850    /// Two `POST /mcp` calls with the same session id in the request
10851    /// header must hit the same `SessionState` (i.e. no new entry
10852    /// gets allocated). The second response echoes the same id back.
10853    #[test]
10854    fn mcp_post_with_valid_session_id_continues_session() {
10855        let runtime = rt();
10856        let h = Harness::new(&runtime);
10857        let r = h.router.clone();
10858        runtime.block_on(async move {
10859            // First request: no header → fresh id.
10860            let req = Request::builder()
10861                .method("POST")
10862                .uri("/mcp")
10863                .header("content-type", "application/json")
10864                .body(Body::from(
10865                    serde_json::to_vec(&json!({
10866                        "jsonrpc": "2.0",
10867                        "id": 101,
10868                        "method": "tools/list",
10869                    }))
10870                    .unwrap(),
10871                ))
10872                .unwrap();
10873            let resp1 = r.clone().oneshot(req).await.unwrap();
10874            assert_eq!(resp1.status(), StatusCode::OK);
10875            let assigned_id = resp1
10876                .headers()
10877                .get("mcp-session-id")
10878                .and_then(|v| v.to_str().ok())
10879                .map(|s| s.to_string())
10880                .expect("first response must carry mcp-session-id");
10881
10882            // Second request: carry the same id forward.
10883            let req2 = Request::builder()
10884                .method("POST")
10885                .uri("/mcp")
10886                .header("content-type", "application/json")
10887                .header("mcp-session-id", &assigned_id)
10888                .body(Body::from(
10889                    serde_json::to_vec(&json!({
10890                        "jsonrpc": "2.0",
10891                        "id": 102,
10892                        "method": "tools/list",
10893                    }))
10894                    .unwrap(),
10895                ))
10896                .unwrap();
10897            let resp2 = r.oneshot(req2).await.unwrap();
10898            assert_eq!(resp2.status(), StatusCode::OK);
10899            let echoed = resp2
10900                .headers()
10901                .get("mcp-session-id")
10902                .and_then(|v| v.to_str().ok())
10903                .map(|s| s.to_string())
10904                .expect("continuation response must echo mcp-session-id");
10905            assert_eq!(
10906                echoed, assigned_id,
10907                "second response must echo the same session id"
10908            );
10909        });
10910        h.shutdown(&runtime);
10911    }
10912
10913    /// A `POST /mcp` carrying a random / never-assigned `Mcp-Session-Id`
10914    /// must surface as 404 with the `session_expired` error
10915    /// discriminator and the re-initialize instruction in the body.
10916    #[test]
10917    fn mcp_post_with_unknown_session_id_returns_404() {
10918        let runtime = rt();
10919        let h = Harness::new(&runtime);
10920        let r = h.router.clone();
10921        runtime.block_on(async move {
10922            let req = Request::builder()
10923                .method("POST")
10924                .uri("/mcp")
10925                .header("content-type", "application/json")
10926                // A plausibly-shaped id the server never assigned.
10927                .header("mcp-session-id", "11111111-2222-3333-4444-555555555555")
10928                .body(Body::from(
10929                    serde_json::to_vec(&json!({
10930                        "jsonrpc": "2.0",
10931                        "id": 103,
10932                        "method": "tools/list",
10933                    }))
10934                    .unwrap(),
10935                ))
10936                .unwrap();
10937            let resp = r.oneshot(req).await.unwrap();
10938            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
10939            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
10940            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
10941            assert_eq!(
10942                v.get("error").and_then(|e| e.as_str()),
10943                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
10944                "404 body must carry the session_expired discriminator: {v}"
10945            );
10946            assert!(
10947                v.get("retry")
10948                    .and_then(|e| e.as_str())
10949                    .map(|s| s == "re-initialize")
10950                    .unwrap_or(false),
10951                "404 body must instruct re-initialize: {v}"
10952            );
10953        });
10954        h.shutdown(&runtime);
10955    }
10956
10957    /// A `POST /mcp` carrying a `Mcp-Session-Id` that WAS assigned but
10958    /// has since been expired (we evict it directly from the store to
10959    /// simulate the TTL sweep) must surface the same 404 +
10960    /// `session_expired` discriminator. Distinct from the
10961    /// "unknown id" test above — same wire response, different cause.
10962    #[test]
10963    fn mcp_post_with_expired_session_id_returns_404() {
10964        let runtime = rt();
10965        let h = Harness::new(&runtime);
10966        let r = h.router.clone();
10967        let store = h.mcp_sessions.clone();
10968        runtime.block_on(async move {
10969            // First request to allocate a session id.
10970            let req1 = Request::builder()
10971                .method("POST")
10972                .uri("/mcp")
10973                .header("content-type", "application/json")
10974                .body(Body::from(
10975                    serde_json::to_vec(&json!({
10976                        "jsonrpc": "2.0",
10977                        "id": 104,
10978                        "method": "tools/list",
10979                    }))
10980                    .unwrap(),
10981                ))
10982                .unwrap();
10983            let resp1 = r.clone().oneshot(req1).await.unwrap();
10984            let assigned_id_str = resp1
10985                .headers()
10986                .get("mcp-session-id")
10987                .and_then(|v| v.to_str().ok())
10988                .map(|s| s.to_string())
10989                .expect("first response must carry mcp-session-id");
10990
10991            // Force-evict the session directly via the harness's
10992            // SessionStore clone. This is the moral equivalent of the
10993            // background sweep evicting an entry past TTL — same
10994            // observable from the wire (the handler's middleware sees
10995            // `SessionStore::get` return `None`). Driving the real
10996            // 30-min inactivity clock is not test-friendly.
10997            let parsed = crate::mcp_session::SessionId::parse(&assigned_id_str)
10998                .expect("just-assigned id must parse");
10999            assert!(
11000                store.delete(&parsed),
11001                "stored session must be deletable"
11002            );
11003
11004            // Now the id is "stale" (no longer in the store) — same
11005            // observable as a TTL eviction.
11006            let req2 = Request::builder()
11007                .method("POST")
11008                .uri("/mcp")
11009                .header("content-type", "application/json")
11010                .header("mcp-session-id", &assigned_id_str)
11011                .body(Body::from(
11012                    serde_json::to_vec(&json!({
11013                        "jsonrpc": "2.0",
11014                        "id": 105,
11015                        "method": "tools/list",
11016                    }))
11017                    .unwrap(),
11018                ))
11019                .unwrap();
11020            let resp2 = r.oneshot(req2).await.unwrap();
11021            assert_eq!(resp2.status(), StatusCode::NOT_FOUND);
11022            let body_bytes = resp2.into_body().collect().await.unwrap().to_bytes();
11023            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
11024            assert_eq!(
11025                v.get("error").and_then(|e| e.as_str()),
11026                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
11027                "expired-session 404 body must carry session_expired: {v}"
11028            );
11029        });
11030        h.shutdown(&runtime);
11031    }
11032
11033    /// v0.11.0 P2: `GET /mcp` REQUIRES an `Mcp-Session-Id` header. The
11034    /// GET stream is "attach to an existing session's notification
11035    /// channel" — there's no session-init story over GET (POST owns
11036    /// session creation). A GET without the header must return 404
11037    /// with the `session_expired` discriminator + `re-initialize`
11038    /// instruction, mirroring the unknown-id 404 wire shape so clients
11039    /// have a single recovery code path.
11040    ///
11041    /// Diverges deliberately from v0.11.0 P1's behaviour (which
11042    /// auto-created on GET) — see `docs/dev-log/0134-v0.11.0-p2-impl.md`
11043    /// for the rationale.
11044    #[test]
11045    fn mcp_get_without_session_id_returns_404() {
11046        let runtime = rt();
11047        let h = Harness::new(&runtime);
11048        let r = h.router.clone();
11049        runtime.block_on(async move {
11050            let req = Request::builder()
11051                .method("GET")
11052                .uri("/mcp")
11053                .header("accept", "text/event-stream")
11054                .body(Body::empty())
11055                .unwrap();
11056            let resp = r.oneshot(req).await.unwrap();
11057            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
11058            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
11059            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
11060            assert_eq!(
11061                v.get("error").and_then(|e| e.as_str()),
11062                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
11063                "GET /mcp without session id must carry session_expired: {v}"
11064            );
11065            assert_eq!(
11066                v.get("retry").and_then(|e| e.as_str()),
11067                Some("re-initialize"),
11068            );
11069        });
11070        h.shutdown(&runtime);
11071    }
11072
11073    // ---------------------------------------------------------------
11074    // v0.11.0 P2 — resumable /mcp GET stream + Last-Event-ID
11075    // ---------------------------------------------------------------
11076    //
11077    // These pin the v0.11.0 P2 wire contract for the resumable GET
11078    // stream: an `Mcp-Session-Id`-bound subscriber sees `event: init`
11079    // first, then any buffered replay events past `Last-Event-ID`,
11080    // then live broadcast events as they're published. The unit-test
11081    // half of the contract (publish_event monotonic + buffer cap) lives
11082    // in `crate::mcp_session::tests`.
11083
11084    /// Open the `/mcp` GET stream for one session id. Returns
11085    /// `(status, body)` where the body is the SSE frame stream.
11086    async fn open_mcp_get_stream(
11087        router: axum::Router,
11088        session_id: &str,
11089        last_event_id: Option<&str>,
11090    ) -> (StatusCode, axum::body::Body, axum::http::HeaderMap) {
11091        let mut builder = Request::builder()
11092            .method("GET")
11093            .uri("/mcp")
11094            .header("accept", "text/event-stream")
11095            .header(crate::mcp_session::MCP_SESSION_ID_HEADER, session_id);
11096        if let Some(leid) = last_event_id {
11097            builder = builder.header(crate::mcp_session::MCP_LAST_EVENT_ID_HEADER, leid);
11098        }
11099        let req = builder
11100            .header("content-length", "0")
11101            .body(Body::empty())
11102            .unwrap();
11103        let resp = router.oneshot(req).await.expect("oneshot");
11104        let status = resp.status();
11105        let headers = resp.headers().clone();
11106        let body = resp.into_body();
11107        (status, body, headers)
11108    }
11109
11110    /// Allocate one session via a POST so a follow-up GET can attach.
11111    /// Returns the assigned session id from the response header.
11112    async fn allocate_mcp_session(router: axum::Router) -> String {
11113        let req = Request::builder()
11114            .method("POST")
11115            .uri("/mcp")
11116            .header("content-type", "application/json")
11117            .body(Body::from(
11118                serde_json::to_vec(&json!({
11119                    "jsonrpc": "2.0",
11120                    "id": 1,
11121                    "method": "tools/list",
11122                }))
11123                .unwrap(),
11124            ))
11125            .unwrap();
11126        let resp = router.oneshot(req).await.expect("oneshot");
11127        assert_eq!(resp.status(), StatusCode::OK, "POST must allocate session");
11128        resp.headers()
11129            .get(crate::mcp_session::MCP_SESSION_ID_HEADER)
11130            .and_then(|v| v.to_str().ok())
11131            .map(|s| s.to_string())
11132            .expect("POST must echo Mcp-Session-Id")
11133    }
11134
11135    /// Look up the in-store `Arc<SessionState>` so a test can publish
11136    /// events directly onto the same record the GET handler subscribed
11137    /// to. Takes the [`SessionStore`] directly so callers can clone it
11138    /// out of the harness before moving the harness into the async
11139    /// block.
11140    fn session_state_for_test(
11141        store: &crate::mcp_session::SessionStore,
11142        session_id: &str,
11143    ) -> std::sync::Arc<crate::mcp_session::SessionState> {
11144        let parsed = crate::mcp_session::SessionId::parse(session_id)
11145            .expect("test session id must parse");
11146        store
11147            .get(&parsed)
11148            .expect("session must still be in store")
11149    }
11150
11151    /// GET `/mcp` against a session that's been force-evicted (TTL
11152    /// sweep) returns 404 with the `session_expired` discriminator —
11153    /// same wire shape as POST.
11154    #[test]
11155    fn mcp_get_with_expired_session_id_returns_404() {
11156        let runtime = rt();
11157        let h = Harness::new(&runtime);
11158        let r = h.router.clone();
11159        let store = h.mcp_sessions.clone();
11160        runtime.block_on(async move {
11161            let session_id = allocate_mcp_session(r.clone()).await;
11162            // Force-evict via the harness store handle.
11163            let parsed = crate::mcp_session::SessionId::parse(&session_id).unwrap();
11164            assert!(store.delete(&parsed));
11165            // Now GET against the stale id.
11166            let req = Request::builder()
11167                .method("GET")
11168                .uri("/mcp")
11169                .header("accept", "text/event-stream")
11170                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
11171                .body(Body::empty())
11172                .unwrap();
11173            let resp = r.oneshot(req).await.unwrap();
11174            assert_eq!(resp.status(), StatusCode::NOT_FOUND);
11175            let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
11176            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
11177            assert_eq!(
11178                v.get("error").and_then(|e| e.as_str()),
11179                Some(crate::mcp_session::MCP_SESSION_EXPIRED_ERROR),
11180            );
11181        });
11182        h.shutdown(&runtime);
11183    }
11184
11185    /// Happy-path subscribe: open `/mcp` with a freshly-allocated
11186    /// session id, expect `event: init` as the first frame with the
11187    /// session id echoed in both the response header AND the init
11188    /// payload.
11189    #[test]
11190    fn mcp_get_with_valid_session_id_subscribes() {
11191        let runtime = rt();
11192        let h = Harness::new(&runtime);
11193        let r = h.router.clone();
11194        runtime.block_on(async move {
11195            let session_id = allocate_mcp_session(r.clone()).await;
11196            let (status, mut body, headers) =
11197                open_mcp_get_stream(r, &session_id, None).await;
11198            assert_eq!(status, StatusCode::OK);
11199            // Response header echoes the session id.
11200            let echoed = headers
11201                .get(crate::mcp_session::MCP_SESSION_ID_HEADER)
11202                .and_then(|v| v.to_str().ok())
11203                .unwrap();
11204            assert_eq!(echoed, session_id);
11205            // First frame is the init event.
11206            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
11207                .await
11208                .expect("init event must arrive within 2s");
11209            assert_eq!(ev.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
11210            assert_eq!(ev.data["connected"].as_bool(), Some(true));
11211            assert_eq!(ev.data["session_id"].as_str(), Some(session_id.as_str()));
11212            // Init carries id "0" (reserved sentinel — the first real
11213            // publish_event allocates id 1).
11214            assert_eq!(ev.id.as_deref(), Some("0"));
11215        });
11216        h.shutdown(&runtime);
11217    }
11218
11219    /// Publish 5 events on the session, reconnect with
11220    /// `Last-Event-ID: 2`, observe `init` then events 3, 4, 5 (in
11221    /// order). Pins the resume-from-cursor contract.
11222    #[test]
11223    fn mcp_get_resumes_from_last_event_id() {
11224        let runtime = rt();
11225        let h = Harness::new(&runtime);
11226        let r = h.router.clone();
11227        let store = h.mcp_sessions.clone();
11228        runtime.block_on(async move {
11229            let session_id = allocate_mcp_session(r.clone()).await;
11230            let state = session_state_for_test(&store, &session_id);
11231            for i in 1..=5 {
11232                state.publish_event(
11233                    crate::mcp_session::McpEventKind::Message,
11234                    json!({"n": i}),
11235                );
11236            }
11237            let (status, mut body, _) =
11238                open_mcp_get_stream(r, &session_id, Some("2")).await;
11239            assert_eq!(status, StatusCode::OK);
11240            // First frame is init.
11241            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
11242                .await
11243                .unwrap();
11244            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
11245            // Then events 3, 4, 5 in order.
11246            for expected_id in 3..=5 {
11247                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
11248                    .await
11249                    .expect("replay event must arrive within 2s");
11250                assert_eq!(
11251                    ev.event,
11252                    crate::mcp_session::MCP_STREAM_EVENT_MESSAGE_NAME,
11253                    "expected replay of message event id {expected_id}, got {ev:?}",
11254                );
11255                assert_eq!(ev.id.as_deref(), Some(expected_id.to_string().as_str()));
11256                assert_eq!(ev.data["n"].as_u64(), Some(expected_id));
11257            }
11258        });
11259        h.shutdown(&runtime);
11260    }
11261
11262    /// Publish past the broadcast buffer's capacity (300 events) then
11263    /// reconnect with `Last-Event-ID: 0` (the sentinel for "I just
11264    /// joined and missed everything since event 1"). Observe `event:
11265    /// init`, then a synthetic `event: lagged` describing the gap,
11266    /// then the tail of the buffer.
11267    #[test]
11268    fn mcp_get_emits_lagged_when_last_event_id_too_old() {
11269        let runtime = rt();
11270        let h = Harness::new(&runtime);
11271        let r = h.router.clone();
11272        let store = h.mcp_sessions.clone();
11273        runtime.block_on(async move {
11274            let session_id = allocate_mcp_session(r.clone()).await;
11275            let state = session_state_for_test(&store, &session_id);
11276            // Publish 300 events — buffer cap is 256, so events 1..=44
11277            // get evicted (oldest retained id = 45).
11278            for _ in 0..300 {
11279                state.publish_event(
11280                    crate::mcp_session::McpEventKind::Message,
11281                    json!({}),
11282                );
11283            }
11284            // Last-Event-ID: 1 — claim we've only seen event 1, but
11285            // event 2 (and 3..=44) are gone from the buffer.
11286            let (status, mut body, _) =
11287                open_mcp_get_stream(r, &session_id, Some("1")).await;
11288            assert_eq!(status, StatusCode::OK);
11289            // First frame: init.
11290            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
11291                .await
11292                .unwrap();
11293            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
11294            // Second frame: lagged (synthetic) with id 0.
11295            let lagged = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
11296                .await
11297                .expect("lagged event must arrive within 2s");
11298            assert_eq!(
11299                lagged.event,
11300                crate::mcp_session::MCP_STREAM_EVENT_LAGGED_NAME,
11301                "expected `event: lagged` after Last-Event-ID before buffer",
11302            );
11303            assert_eq!(lagged.id.as_deref(), Some("0"));
11304            assert!(
11305                lagged.data["dropped"].as_u64().unwrap_or(0) > 0,
11306                "lagged event must carry a non-zero `dropped` count: {:?}",
11307                lagged.data,
11308            );
11309        });
11310        h.shutdown(&runtime);
11311    }
11312
11313    /// CORS preflight (OPTIONS) with `Access-Control-Request-Headers:
11314    /// last-event-id` must succeed and the `last-event-id` header must
11315    /// appear in `Access-Control-Allow-Headers`. Without this, a
11316    /// browser reconnecting an SSE stream with `Last-Event-ID:` fails
11317    /// the preflight before the actual GET lands.
11318    #[test]
11319    fn cors_preflight_allows_last_event_id_header() {
11320        let runtime = rt();
11321        let h = Harness::new(&runtime);
11322        let r = h.router.clone();
11323        runtime.block_on(async move {
11324            let req = Request::builder()
11325                .method("OPTIONS")
11326                .uri("/mcp")
11327                .header("origin", "http://localhost:5173")
11328                .header("access-control-request-method", "GET")
11329                .header(
11330                    "access-control-request-headers",
11331                    "last-event-id,mcp-session-id",
11332                )
11333                .body(Body::empty())
11334                .unwrap();
11335            let resp = r.oneshot(req).await.unwrap();
11336            assert!(
11337                resp.status().is_success() || resp.status() == StatusCode::NO_CONTENT,
11338                "preflight must succeed, got: {}",
11339                resp.status(),
11340            );
11341            let allow = resp
11342                .headers()
11343                .get("access-control-allow-headers")
11344                .and_then(|h| h.to_str().ok())
11345                .map(|s| s.to_ascii_lowercase())
11346                .unwrap_or_default();
11347            assert!(
11348                allow.contains("last-event-id"),
11349                "preflight must allow `last-event-id`; allow-headers = {allow:?}",
11350            );
11351            assert!(
11352                allow.contains("mcp-session-id"),
11353                "preflight must allow `mcp-session-id` too; allow-headers = {allow:?}",
11354            );
11355        });
11356        h.shutdown(&runtime);
11357    }
11358
11359    /// Heartbeat cadence: with a short interval, the stream emits a
11360    /// typed `event: heartbeat` after the init event. The production
11361    /// cadence is [`MCP_STREAM_HEARTBEAT_SECS`] (30s); the test
11362    /// exercises `build_mcp_session_stream` directly with a 1-second
11363    /// interval so we don't burn 30s of CI wall time.
11364    #[test]
11365    fn mcp_get_heartbeats_after_init() {
11366        let runtime = rt();
11367        let h = Harness::new(&runtime);
11368        runtime.block_on(async move {
11369            let state = std::sync::Arc::new(crate::mcp_session::SessionState::new(
11370                solo_core::TenantId::default_tenant(),
11371                None,
11372            ));
11373            let session_id = crate::mcp_session::SessionId::new();
11374            let stream = build_mcp_session_stream(
11375                state,
11376                session_id.clone(),
11377                "default".to_string(),
11378                0,
11379                1, // 1-second heartbeat for the test
11380            );
11381            // Pull frames off the stream. Should see init then
11382            // (with no live events) a heartbeat within ~1.5s.
11383            use futures::StreamExt;
11384            let mut stream = std::pin::pin!(stream);
11385            let init_ev = tokio::time::timeout(std::time::Duration::from_secs(2), stream.next())
11386                .await
11387                .expect("init must arrive within 2s")
11388                .expect("stream must yield init");
11389            // Rendering the Event is opaque; we don't introspect it
11390            // here — the wire-format integration test
11391            // `mcp_get_with_valid_session_id_subscribes` covers that.
11392            // This test pins that a SECOND frame lands within the
11393            // heartbeat window. Drop the init frame.
11394            drop(init_ev);
11395            let hb = tokio::time::timeout(std::time::Duration::from_secs(3), stream.next())
11396                .await
11397                .expect("heartbeat must arrive within ~3s")
11398                .expect("stream must yield heartbeat");
11399            // Same opacity — we observe presence, not content. The
11400            // integration-level test
11401            // `mcp_get_with_valid_session_id_subscribes` covers wire
11402            // content.
11403            drop(hb);
11404        });
11405        h.shutdown(&runtime);
11406    }
11407
11408    /// v0.11.0 P3: `memory_ingest_document` emits the first two phase
11409    /// events (parsed, chunked) BEFORE the writer-actor call, so they
11410    /// fire even when the underlying writer has no embedder configured.
11411    /// This pins the upstream half of the 4-phase ingest progress
11412    /// taxonomy without needing a fully-equipped writer harness — the
11413    /// post-writer phases (embedded, inserted) are pinned indirectly
11414    /// by the `MCP_NOTIFICATION_PROGRESS_METHOD` grep-ability and by
11415    /// the dispatch_tests-level progress-emission tests for the other
11416    /// two long-running tools (search_docs / remember_batch).
11417    #[test]
11418    fn mcp_http_ingest_document_emits_parsed_and_chunked_progress_events() {
11419        let runtime = rt();
11420        let h = Harness::new(&runtime);
11421        let r = h.router.clone();
11422        let store = h.mcp_sessions.clone();
11423        runtime.block_on(async move {
11424            let session_id = allocate_mcp_session(r.clone()).await;
11425            let state = session_state_for_test(&store, &session_id);
11426            let mut rx = state.subscribe_events();
11427            // Write a `.txt` so the parser doesn't reject before the
11428            // first progress event fires — handle_ingest_document
11429            // emits `parsed` BEFORE the writer call but AFTER the
11430            // empty-path guard; both pre-writer phases fire regardless
11431            // of writer-side embedder configuration.
11432            let tmpdir = tempfile::TempDir::new().unwrap();
11433            let tmpfile = tmpdir.path().join("ingest-progress.txt");
11434            std::fs::write(&tmpfile, b"hello world progress test").unwrap();
11435            let body = json!({
11436                "jsonrpc": "2.0",
11437                "id": 2,
11438                "method": "tools/call",
11439                "params": {
11440                    "name": "memory_ingest_document",
11441                    "arguments": { "path": tmpfile.to_string_lossy() },
11442                    "_meta": { "progressToken": "ingest-tok" },
11443                },
11444            });
11445            let req = Request::builder()
11446                .method("POST")
11447                .uri("/mcp")
11448                .header("content-type", "application/json")
11449                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
11450                .body(Body::from(serde_json::to_vec(&body).unwrap()))
11451                .unwrap();
11452            let resp = r.clone().oneshot(req).await.expect("oneshot");
11453            assert_eq!(resp.status(), StatusCode::OK);
11454            let _ = resp.into_body().collect().await.unwrap().to_bytes();
11455            let mut events = Vec::new();
11456            while let Ok(ev) = rx.try_recv() {
11457                events.push(ev);
11458            }
11459            // We expect AT LEAST the 2 pre-writer phase events. In a
11460            // fully-equipped harness the writer would succeed and the
11461            // post-writer phases (embedded + inserted) would also fire;
11462            // here we pin the pre-writer half + the spec envelope shape.
11463            assert!(
11464                events.len() >= 2,
11465                "expected at least 2 progress events (parsed + chunked), got {}: {events:?}",
11466                events.len()
11467            );
11468            // Phase 1 = "parsed"; phase 2 = "chunked"; both carry
11469            // total=4 and progressToken="ingest-tok".
11470            assert_eq!(events[0].data["params"]["progress"], json!(1));
11471            assert_eq!(events[0].data["params"]["message"], json!("parsed"));
11472            assert_eq!(events[1].data["params"]["progress"], json!(2));
11473            assert_eq!(events[1].data["params"]["message"], json!("chunked"));
11474            for ev in &events {
11475                assert_eq!(
11476                    ev.event,
11477                    crate::mcp_session::McpEventKind::Progress,
11478                );
11479                assert_eq!(
11480                    ev.data["method"],
11481                    json!(crate::mcp_progress::MCP_NOTIFICATION_PROGRESS_METHOD)
11482                );
11483                assert_eq!(ev.data["params"]["progressToken"], json!("ingest-tok"));
11484                assert_eq!(ev.data["params"]["total"], json!(4));
11485            }
11486        });
11487        h.shutdown(&runtime);
11488    }
11489
11490    /// v0.11.0 P3: end-to-end progress event roundtrip — POST a
11491    /// `tools/call` carrying `_meta.progressToken`, then reconnect via
11492    /// `GET /mcp` with a `Last-Event-ID` that triggers buffer replay.
11493    /// Confirms the wire path:
11494    /// `tools/call params._meta.progressToken` → ProgressReporter →
11495    /// SessionState.publish_event → replay buffer → GET SSE replay
11496    /// drain → client receives spec-shape envelope.
11497    ///
11498    /// `Last-Event-ID: 0` is treated as "brand new subscriber, no
11499    /// replay" per the v0.11.0 P2 contract — so we drive a non-zero
11500    /// `Last-Event-ID` smaller than every event id by first force-
11501    /// publishing one synthetic seed event (id 1), then issuing the
11502    /// real `tools/call` (which publishes 3 progress events with
11503    /// ids 2..=4), then GET with `Last-Event-ID: 1` to replay
11504    /// exactly the progress trio.
11505    #[test]
11506    fn mcp_http_progress_event_subscribers_receive_via_get_mcp_stream() {
11507        let runtime = rt();
11508        let h = Harness::new(&runtime);
11509        let r = h.router.clone();
11510        let store = h.mcp_sessions.clone();
11511        runtime.block_on(async move {
11512            // 1. Allocate a session via an initial POST.
11513            let session_id = allocate_mcp_session(r.clone()).await;
11514            // 2. Seed one synthetic event (id 1) so the buffer is
11515            //    non-empty before the real progress events. The GET
11516            //    handler's replay path only fires when last_event_id
11517            //    > 0; we'll pass Last-Event-ID: 1 to skip the seed and
11518            //    replay the progress events that follow.
11519            let state = session_state_for_test(&store, &session_id);
11520            state.publish_event(
11521                crate::mcp_session::McpEventKind::Message,
11522                json!({"seed": true}),
11523            );
11524            // 3. POST a `memory_search_docs` tools/call carrying
11525            //    `_meta.progressToken` (well above the top_k threshold
11526            //    so progress IS emitted). The query returns empty hits
11527            //    in the harness — what matters here is that the 3
11528            //    progress events fire as side effects of the call.
11529            let body = json!({
11530                "jsonrpc": "2.0",
11531                "id": 2,
11532                "method": "tools/call",
11533                "params": {
11534                    "name": "memory_search_docs",
11535                    "arguments": { "query": "anything", "limit": 150 },
11536                    "_meta": { "progressToken": "progress-roundtrip" },
11537                },
11538            });
11539            let req = Request::builder()
11540                .method("POST")
11541                .uri("/mcp")
11542                .header("content-type", "application/json")
11543                .header(crate::mcp_session::MCP_SESSION_ID_HEADER, &session_id)
11544                .body(Body::from(serde_json::to_vec(&body).unwrap()))
11545                .unwrap();
11546            let resp = r.clone().oneshot(req).await.expect("oneshot");
11547            assert_eq!(resp.status(), StatusCode::OK);
11548            // Drain the POST response so the future completes before
11549            // we open the GET stream.
11550            let _ = resp.into_body().collect().await.unwrap().to_bytes();
11551            // 4. Open the GET stream with Last-Event-ID: 1 — replay
11552            //    every event past the seed.
11553            let (status, mut stream_body, _) =
11554                open_mcp_get_stream(r, &session_id, Some("1")).await;
11555            assert_eq!(status, StatusCode::OK);
11556            // First frame: init (id 0, reserved sentinel).
11557            let init = read_one_sse_event(&mut stream_body, std::time::Duration::from_secs(2))
11558                .await
11559                .expect("init must arrive within 2s");
11560            assert_eq!(init.event, crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME);
11561            // Then 3 progress events (the search_docs handler emits 3
11562            // when top_k > 100). Collect them and assert the spec
11563            // envelope shape.
11564            for expected_progress in 1u64..=3u64 {
11565                let ev = read_one_sse_event(&mut stream_body, std::time::Duration::from_secs(2))
11566                    .await
11567                    .expect("progress event must arrive within 2s");
11568                assert_eq!(
11569                    ev.event,
11570                    crate::mcp_session::MCP_STREAM_EVENT_PROGRESS_NAME,
11571                    "expected progress event #{expected_progress}, got {ev:?}",
11572                );
11573                // Spec-shape envelope: jsonrpc + method + params{progressToken, progress, total}.
11574                assert_eq!(ev.data["jsonrpc"], json!("2.0"));
11575                assert_eq!(
11576                    ev.data["method"],
11577                    json!(crate::mcp_progress::MCP_NOTIFICATION_PROGRESS_METHOD)
11578                );
11579                assert_eq!(
11580                    ev.data["params"]["progressToken"],
11581                    json!("progress-roundtrip")
11582                );
11583                assert_eq!(
11584                    ev.data["params"]["progress"],
11585                    json!(expected_progress)
11586                );
11587                assert_eq!(ev.data["params"]["total"], json!(3));
11588            }
11589        });
11590        h.shutdown(&runtime);
11591    }
11592
11593    /// `initialize` returns the `{name: "solo", version: <crate
11594    /// version>}` server-info pinned by the stdio invariant test
11595    /// `server_info_identity_is_solo_not_rmcp_or_solo_api`. Sanity
11596    /// check that the v0.10.2 HTTP transport doesn't drift away from
11597    /// the stdio identity.
11598    #[test]
11599    fn mcp_http_initialize_returns_solo_server_info() {
11600        let runtime = rt();
11601        let h = Harness::new(&runtime);
11602        let r = h.router.clone();
11603        runtime.block_on(async move {
11604            let req = json!({
11605                "jsonrpc": "2.0",
11606                "id": 7,
11607                "method": "initialize",
11608                "params": {
11609                    "protocolVersion": "2024-11-05",
11610                    "capabilities": {},
11611                    "clientInfo": { "name": "solo-http-test", "version": "0.0.0" },
11612                },
11613            });
11614            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
11615            assert_eq!(status, StatusCode::OK);
11616            assert_eq!(
11617                body.pointer("/result/serverInfo/name").and_then(|v| v.as_str()),
11618                Some("solo"),
11619                "serverInfo.name must be `solo`, not `solo-api` or `rmcp`; got: {body}"
11620            );
11621            // `protocolVersion` is the static value the dispatcher
11622            // emits today (2024-11-05). The stdio loop emits rmcp's
11623            // own default — we cross-check those two stay aligned in
11624            // the v0.10.3+ session work; for v0.10.2 we just pin the
11625            // HTTP-side value.
11626            assert_eq!(
11627                body.pointer("/result/protocolVersion").and_then(|v| v.as_str()),
11628                Some("2024-11-05"),
11629            );
11630        });
11631        h.shutdown(&runtime);
11632    }
11633
11634    // ----------------------------------------------------------------
11635    // v0.11.0 P4 — notifications/message bridge from InvalidateEvent
11636    // ----------------------------------------------------------------
11637
11638    /// v0.11.0 P4: a fresh POST /mcp (no session id) causes the per-
11639    /// session invalidate bridge to be spawned. Pin by firing an
11640    /// invalidate on the harness's broadcast sender AFTER the session
11641    /// is allocated and asserting the session's own event channel
11642    /// receives an MCP `notifications/message` event.
11643    #[test]
11644    fn session_subscribes_to_tenant_invalidate_on_creation() {
11645        let runtime = rt();
11646        let h = Harness::new(&runtime);
11647        let r = h.router.clone();
11648        let store = h.mcp_sessions.clone();
11649        let sender = h.invalidate_sender();
11650        runtime.block_on(async move {
11651            // Allocate session — POST handler spawns the bridge.
11652            let session_id = allocate_mcp_session(r).await;
11653            let state = session_state_for_test(&store, &session_id);
11654            let mut rx = state.subscribe_events();
11655            // Fire one invalidate on the tenant's broadcast.
11656            sender
11657                .send(InvalidateEvent {
11658                    reason: "memory.remember".to_string(),
11659                    tenant_id: "default".to_string(),
11660                    ts_ms: 1_715_625_600_000,
11661                    kind: "episode".to_string(),
11662                })
11663                .expect("at least one subscriber (the bridge)");
11664            // Bridge forwards it to the session as an MCP Message.
11665            let received = tokio::time::timeout(
11666                std::time::Duration::from_secs(2),
11667                rx.recv(),
11668            )
11669            .await
11670            .expect("bridge must forward invalidate within 2s")
11671            .expect("session receiver must observe published event");
11672            assert_eq!(received.event, crate::mcp_session::McpEventKind::Message);
11673            assert_eq!(
11674                received.data["method"].as_str(),
11675                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
11676            );
11677        });
11678        h.shutdown(&runtime);
11679    }
11680
11681    /// v0.11.0 P4: pin the exact envelope shape — `jsonrpc=2.0`,
11682    /// `method=notifications/message`, `params.{level,logger,data,details}`.
11683    /// One full round-trip through the bridge so a future refactor
11684    /// that changes the wire format trips this test.
11685    #[test]
11686    fn invalidate_event_translates_to_mcp_notifications_message() {
11687        let runtime = rt();
11688        let h = Harness::new(&runtime);
11689        let r = h.router.clone();
11690        let store = h.mcp_sessions.clone();
11691        let sender = h.invalidate_sender();
11692        runtime.block_on(async move {
11693            let session_id = allocate_mcp_session(r).await;
11694            let state = session_state_for_test(&store, &session_id);
11695            let mut rx = state.subscribe_events();
11696            sender
11697                .send(InvalidateEvent {
11698                    reason: "memory.ingest_document".to_string(),
11699                    tenant_id: "default".to_string(),
11700                    ts_ms: 1_715_625_999_999,
11701                    kind: "document".to_string(),
11702                })
11703                .expect("at least one subscriber");
11704            let received = tokio::time::timeout(
11705                std::time::Duration::from_secs(2),
11706                rx.recv(),
11707            )
11708            .await
11709            .expect("forward within 2s")
11710            .expect("session must receive event");
11711            // Envelope shape.
11712            assert_eq!(received.data["jsonrpc"].as_str(), Some("2.0"));
11713            assert_eq!(
11714                received.data["method"].as_str(),
11715                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
11716            );
11717            let params = &received.data["params"];
11718            assert_eq!(
11719                params["level"].as_str(),
11720                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_LEVEL),
11721            );
11722            assert_eq!(
11723                params["logger"].as_str(),
11724                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_LOGGER),
11725            );
11726            // document kind maps to documents_updated.
11727            assert_eq!(
11728                params["data"].as_str(),
11729                Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_DOCUMENTS_UPDATED),
11730            );
11731            // details preserves the structured original event.
11732            assert_eq!(
11733                params["details"]["reason"].as_str(),
11734                Some("memory.ingest_document"),
11735            );
11736            assert_eq!(
11737                params["details"]["kind"].as_str(),
11738                Some("document"),
11739            );
11740            assert_eq!(
11741                params["details"]["ts_ms"].as_i64(),
11742                Some(1_715_625_999_999),
11743            );
11744        });
11745        h.shutdown(&runtime);
11746    }
11747
11748    /// v0.11.0 P4: two sessions exist; each has its own bridge. An
11749    /// invalidate fires once on the (shared, single-tenant) broadcast
11750    /// and BOTH sessions receive it. Pins that the bridge is correctly
11751    /// per-session-scoped: it doesn't leak to a wrong session AND it
11752    /// doesn't fail to fan out to all sessions of the same tenant.
11753    ///
11754    /// The harness is single-tenant by design, so the "wrong tenant
11755    /// doesn't receive" half is structurally guaranteed (different
11756    /// tenants would have different `invalidate_sender`s — the
11757    /// `mcp_notify` unit tests pin the bridge wiring against a fake
11758    /// channel directly). This integration test pins the
11759    /// per-session-of-same-tenant fan-out behaviour.
11760    #[test]
11761    fn invalidate_event_published_to_correct_session_only() {
11762        let runtime = rt();
11763        let h = Harness::new(&runtime);
11764        let r = h.router.clone();
11765        let store = h.mcp_sessions.clone();
11766        let sender = h.invalidate_sender();
11767        runtime.block_on(async move {
11768            // Allocate two distinct sessions.
11769            let session_id_a = allocate_mcp_session(r.clone()).await;
11770            let session_id_b = allocate_mcp_session(r).await;
11771            assert_ne!(session_id_a, session_id_b);
11772            let state_a = session_state_for_test(&store, &session_id_a);
11773            let state_b = session_state_for_test(&store, &session_id_b);
11774            let mut rx_a = state_a.subscribe_events();
11775            let mut rx_b = state_b.subscribe_events();
11776            // Fire one invalidate.
11777            sender
11778                .send(InvalidateEvent {
11779                    reason: "memory.consolidate".to_string(),
11780                    tenant_id: "default".to_string(),
11781                    ts_ms: 1_715_625_600_000,
11782                    kind: "cluster".to_string(),
11783                })
11784                .expect("at least one subscriber");
11785            // Both sessions' bridges receive it independently.
11786            let a = tokio::time::timeout(std::time::Duration::from_secs(2), rx_a.recv())
11787                .await
11788                .expect("session A receives within 2s")
11789                .expect("session A receiver alive");
11790            let b = tokio::time::timeout(std::time::Duration::from_secs(2), rx_b.recv())
11791                .await
11792                .expect("session B receives within 2s")
11793                .expect("session B receiver alive");
11794            for evt in [&a, &b] {
11795                assert_eq!(evt.event, crate::mcp_session::McpEventKind::Message);
11796                assert_eq!(
11797                    evt.data["params"]["data"].as_str(),
11798                    Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_CONSOLIDATION_UPDATED),
11799                );
11800            }
11801        });
11802        h.shutdown(&runtime);
11803    }
11804
11805    /// v0.11.0 P4: full GET-stream integration. A POST opens a session
11806    /// AND spawns its bridge; an invalidate fires on the tenant's
11807    /// broadcast; a GET subscriber reading the SSE wire format
11808    /// observes the `event: message` SSE frame carrying the spec-shape
11809    /// `notifications/message` envelope.
11810    ///
11811    /// Uses the `Last-Event-ID` resume path with id 0 (sentinel —
11812    /// "I'm a new subscriber, no replay"); the invalidate fires AFTER
11813    /// the GET opens so the live broadcast receiver picks it up.
11814    #[test]
11815    fn mcp_get_subscriber_receives_notifications_message_event() {
11816        let runtime = rt();
11817        let h = Harness::new(&runtime);
11818        let r = h.router.clone();
11819        let sender = h.invalidate_sender();
11820        runtime.block_on(async move {
11821            let session_id = allocate_mcp_session(r.clone()).await;
11822            // Open the GET stream first so the live broadcast receiver
11823            // is attached BEFORE the invalidate fires.
11824            let (status, mut body, _) =
11825                open_mcp_get_stream(r, &session_id, None).await;
11826            assert_eq!(status, StatusCode::OK);
11827            // Drain the init frame.
11828            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
11829                .await
11830                .expect("init event must arrive within 2s");
11831            assert_eq!(
11832                init.event,
11833                crate::mcp_session::MCP_STREAM_EVENT_INIT_NAME,
11834            );
11835            // Now fire the invalidate.
11836            sender
11837                .send(InvalidateEvent {
11838                    reason: "memory.triples_extract".to_string(),
11839                    tenant_id: "default".to_string(),
11840                    ts_ms: 1_715_625_600_000,
11841                    kind: "triple".to_string(),
11842                })
11843                .expect("send must succeed");
11844            // Bridge forwards → SessionState.publish_event → broadcast
11845            // → GET stream consumer → SSE wire frame.
11846            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
11847                .await
11848                .expect("message event must arrive within 2s");
11849            assert_eq!(
11850                ev.event,
11851                crate::mcp_session::MCP_STREAM_EVENT_MESSAGE_NAME,
11852            );
11853            assert_eq!(ev.data["jsonrpc"].as_str(), Some("2.0"));
11854            assert_eq!(
11855                ev.data["method"].as_str(),
11856                Some(crate::mcp_notify::MCP_NOTIFICATION_MESSAGE_METHOD),
11857            );
11858            assert_eq!(
11859                ev.data["params"]["data"].as_str(),
11860                Some(crate::mcp_notify::MCP_NOTIFICATION_DATA_GRAPH_UPDATED),
11861            );
11862            assert_eq!(
11863                ev.data["params"]["details"]["reason"].as_str(),
11864                Some("memory.triples_extract"),
11865            );
11866        });
11867        h.shutdown(&runtime);
11868    }
11869}
11870
11871#[cfg(test)]
11872mod cors_tests {
11873    use super::is_localhost_origin;
11874
11875    #[test]
11876    fn accepts_canonical_localhost_origins() {
11877        assert!(is_localhost_origin("http://localhost"));
11878        assert!(is_localhost_origin("http://localhost:3000"));
11879        assert!(is_localhost_origin("https://localhost:8443"));
11880        assert!(is_localhost_origin("http://127.0.0.1"));
11881        assert!(is_localhost_origin("http://127.0.0.1:5173"));
11882        assert!(is_localhost_origin("http://[::1]"));
11883        assert!(is_localhost_origin("http://[::1]:8080"));
11884    }
11885
11886    #[test]
11887    fn rejects_remote_origins() {
11888        assert!(!is_localhost_origin("http://example.com"));
11889        assert!(!is_localhost_origin("https://malicious.example"));
11890        assert!(!is_localhost_origin("http://192.168.1.5"));
11891        assert!(!is_localhost_origin("http://10.0.0.1"));
11892    }
11893
11894    #[test]
11895    fn rejects_dns_rebinding_tricks() {
11896        // nip.io and friends — DNS that resolves to 127.0.0.1 but the
11897        // Origin header carries the public-DNS name. Rejecting these
11898        // closes the rebinding-via-Origin gap.
11899        assert!(!is_localhost_origin("http://127.0.0.1.nip.io"));
11900        assert!(!is_localhost_origin("http://localhost.evil.com"));
11901        assert!(!is_localhost_origin("http://evil.localhost"));
11902    }
11903
11904    #[test]
11905    fn rejects_non_http_schemes() {
11906        assert!(!is_localhost_origin("file:///"));
11907        assert!(!is_localhost_origin("ws://localhost:3000"));
11908        assert!(!is_localhost_origin("javascript:alert(1)"));
11909    }
11910
11911    #[test]
11912    fn rejects_malformed() {
11913        assert!(!is_localhost_origin(""));
11914        assert!(!is_localhost_origin("localhost"));
11915        assert!(!is_localhost_origin("//localhost"));
11916    }
11917}
11918