Skip to main content

solo_api/
http.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! HTTP/JSON transport for Solo. Local-only by default — binds to
4//! `127.0.0.1:<port>` and serves the same operations the MCP server
5//! exposes:
6//!
7//! Episode operations:
8//!   - `POST /memory`                — remember (body: { content, source_type?, source_id? })
9//!   - `POST /memory/search`         — recall  (body: { query, limit? })
10//!   - `GET  /memory/{id}`           — inspect
11//!   - `DELETE /memory/{id}?reason=…` — forget
12//!
13//! Maintenance:
14//!   - `POST /memory/consolidate`    — trigger a consolidation pass
15//!   - `POST /backup`                — encrypted online backup
16//!
17//! Derived-layer (v0.4.0+; queries against the Steward's outputs):
18//!   - `GET  /memory/themes?window_days=N&limit=K`
19//!   - `GET  /memory/facts_about?subject=X&predicate=Y&since_ms=N&until_ms=N&include_as_object=B&limit=K`
20//!   - `GET  /memory/contradictions?limit=K`
21//!   - `GET  /memory/clusters/{cluster_id}?full_content=true` (v0.5.0+)
22//!
23//! Document operations (v0.7.0+):
24//!   - `POST   /memory/documents`               — ingest a file
25//!   - `POST   /memory/documents/search`        — vector search over chunks
26//!   - `GET    /memory/documents`               — paginate documents
27//!   - `GET    /memory/documents/{id}`          — inspect one document
28//!   - `DELETE /memory/documents/{id}`          — soft-delete a document
29//!
30//! There's no auth at this layer. The threat model is local-machine
31//! single-user; binding to `127.0.0.1` keeps the surface off the LAN.
32//! A future commit can add bearer-token auth + LAN binding.
33//!
34//! ## Lifecycle
35//!
36//! `serve_http(addr, server, shutdown)` binds to `addr`, runs axum with
37//! `with_graceful_shutdown(shutdown)`, returns when shutdown fires or
38//! the listener errors. `solo http-serve` invokes this from inside a
39//! `OneShotContext`, so writer + reader pool + lockfile stay live for
40//! the server's lifetime and clean up properly afterwards.
41
42use std::convert::Infallible;
43use std::net::SocketAddr;
44use std::str::FromStr;
45use std::sync::Arc;
46use std::time::Duration;
47
48use axum::extract::{FromRequestParts, Path, Query, State};
49use axum::http::request::Parts;
50use axum::http::{HeaderValue, Method, StatusCode};
51use axum::response::sse::{Event, KeepAlive, Sse};
52use axum::response::{IntoResponse, Response};
53use axum::routing::{get, post};
54use axum::{Json, Router};
55use futures::Stream;
56use serde::{Deserialize, Serialize};
57use solo_core::{
58    Confidence, DocumentId, EncodingContext, Episode, InvalidateEvent, MemoryId, TenantId,
59    Tier,
60};
61use solo_storage::{TenantHandle, TenantRegistry};
62use tokio::sync::broadcast;
63use tower_http::cors::{AllowOrigin, CorsLayer};
64use tower_http::trace::TraceLayer;
65
66use crate::auth::{AuthConfig, AuthenticatedPrincipal, middleware::AuthValidator};
67
68/// HTTP-side application state. v0.8.0 P2 swapped per-handler `WriteHandle
69/// + ReaderPool + ...` for a `TenantRegistry` that resolves tenant on each
70/// request via the `X-Solo-Tenant` header (default tenant if absent).
71#[derive(Clone)]
72pub struct SoloHttpState {
73    /// Multi-tenant registry. Lazy-loads tenants on first request.
74    pub registry: Arc<TenantRegistry>,
75    /// Default tenant used when the `X-Solo-Tenant` header is absent.
76    /// Typically `TenantId::default_tenant()`.
77    pub default_tenant: TenantId,
78    /// Read-path aliases for the canonical `"user"` subject. Sourced
79    /// from `solo.config.toml` `[identity] user_aliases`; threaded
80    /// through to `solo_query::facts_about` so a query for `"alex"`
81    /// also surfaces rows historically extracted as `"user"`. Empty
82    /// vec = behave as today. Wrapped in `Arc` so handler `clone()`s
83    /// stay cheap. v0.5.0 Priority 1 sub-step 1C.
84    pub user_aliases: Arc<Vec<String>>,
85}
86
87/// HTTP header that routes a request to a specific tenant. Optional;
88/// absent → state.default_tenant.
89pub const TENANT_HEADER: &str = "x-solo-tenant";
90
91/// Axum extractor that resolves the request's target tenant, then
92/// lazy-opens the tenant via the registry.
93///
94/// Resolution order (v0.8.0 P3):
95///   1. `AuthenticatedPrincipal.tenant_claim` from request extensions —
96///      set by the auth middleware. In OIDC mode this is the validated
97///      value of the configured custom claim (default `solo_tenant`);
98///      in bearer mode this is the daemon's default tenant.
99///   2. `X-Solo-Tenant` header — falls back to this when no
100///      authenticated principal is on the request (unauthenticated
101///      loopback deployments — the default).
102///   3. `state.default_tenant` when neither is present.
103///
104/// Bad header values → 400. Lazy-open failures → 500 unless the failure
105/// kind is `NotFound` (unknown tenant id) → 404.
106pub struct TenantExtractor(pub Arc<TenantHandle>);
107
108impl<S> FromRequestParts<S> for TenantExtractor
109where
110    SoloHttpState: FromRef<S>,
111    S: Send + Sync,
112{
113    type Rejection = ApiError;
114
115    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
116        let state = SoloHttpState::from_ref(state);
117        // Order: (1) principal.tenant_claim (set by auth middleware),
118        // (2) X-Solo-Tenant header, (3) state.default_tenant.
119        //
120        // The principal wins because in OIDC mode the JWT is the source
121        // of truth — letting the header override an OIDC claim would
122        // be a tenant-impersonation hole.
123        let resolved = if let Some(principal) = parts.extensions.get::<AuthenticatedPrincipal>()
124            && let Some(claim) = principal.tenant_claim.clone()
125        {
126            claim
127        } else {
128            match parts.headers.get(TENANT_HEADER) {
129                None => state.default_tenant.clone(),
130                Some(raw) => {
131                    let s = raw.to_str().map_err(|e| {
132                        ApiError::bad_request(format!(
133                            "{TENANT_HEADER}: header value must be ASCII ({e})"
134                        ))
135                    })?;
136                    TenantId::new(s.to_string()).map_err(|e| {
137                        ApiError::bad_request(format!("{TENANT_HEADER}: invalid tenant id: {e}"))
138                    })?
139                }
140            }
141        };
142        let handle = state.registry.get_or_open(&resolved).await.map_err(|e| {
143            // Map NotFound → 404; everything else → 500.
144            use solo_core::Error;
145            match &e {
146                Error::NotFound(_) => ApiError::not_found(e.to_string()),
147                Error::InvalidInput(_) => ApiError::bad_request(e.to_string()),
148                _ => ApiError::internal(e.to_string()),
149            }
150        })?;
151        Ok(TenantExtractor(handle))
152    }
153}
154
155use axum::extract::FromRef;
156
157/// v0.8.0 P4: extractor that pulls the authenticated principal's
158/// `subject` (JWT `sub` or `"bearer"`) out of request extensions for the
159/// audit log. `None` when no `AuthenticatedPrincipal` is present
160/// (unauthenticated loopback deployments).
161pub struct AuditPrincipal(pub Option<String>);
162
163impl<S> FromRequestParts<S> for AuditPrincipal
164where
165    S: Send + Sync,
166{
167    type Rejection = std::convert::Infallible;
168
169    async fn from_request_parts(
170        parts: &mut Parts,
171        _state: &S,
172    ) -> Result<Self, Self::Rejection> {
173        Ok(AuditPrincipal(
174            parts
175                .extensions
176                .get::<AuthenticatedPrincipal>()
177                .map(|p| p.subject.clone()),
178        ))
179    }
180}
181
182/// v0.10.0: extractor that lifts the full `AuthenticatedPrincipal` out
183/// of request extensions for the `/v1/tenants` handler. Distinct from
184/// `AuditPrincipal` (which only carries `subject: Option<String>`) — the
185/// tenant-list handler needs the `tenant_claim` and `claims` fields to
186/// distinguish bearer (claims = Null) from OIDC (claims = JWT object)
187/// principals.
188///
189/// `None` when no `AuthenticatedPrincipal` is on the request — the
190/// unauthenticated loopback deployment path, which the tenant-list
191/// handler treats as "all tenants visible" (same scope as the
192/// `solo tenants list` CLI). See `docs/dev-log/0119-tenants-list-impl.md`
193/// for the three-case visibility rule.
194pub struct MaybePrincipal(pub Option<AuthenticatedPrincipal>);
195
196impl<S> FromRequestParts<S> for MaybePrincipal
197where
198    S: Send + Sync,
199{
200    type Rejection = std::convert::Infallible;
201
202    async fn from_request_parts(
203        parts: &mut Parts,
204        _state: &S,
205    ) -> Result<Self, Self::Rejection> {
206        Ok(MaybePrincipal(
207            parts
208                .extensions
209                .get::<AuthenticatedPrincipal>()
210                .cloned(),
211        ))
212    }
213}
214
215/// Build the router with optional bearer-token auth (v0.7.x legacy shape).
216///
217/// When `bearer_token` is `Some(t)`, every request except `GET /health`
218/// + `GET /openapi.json` (unauthenticated probes / machine-readable spec)
219/// requires `Authorization: Bearer t`. v0.8.0 P3 routes this through the
220/// new `AuthValidator::Bearer` middleware so an `AuthenticatedPrincipal`
221/// is attached to every authenticated request (the `TenantExtractor`
222/// reads `principal.tenant_claim` ahead of the `X-Solo-Tenant` header).
223pub fn router_with_auth(state: SoloHttpState, bearer_token: Option<String>) -> Router {
224    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
225    router_with_auth_config(state, auth)
226}
227
228/// Build the router with a config-driven auth block (v0.8.0 P3+).
229///
230/// `auth = Some(AuthConfig::Bearer { token })` is equivalent to passing
231/// `Some(token)` to [`router_with_auth`]. `auth = Some(AuthConfig::Oidc { … })`
232/// installs the OIDC middleware (JWKS fetch + cache + sig + claim checks).
233/// `auth = None` runs unauthenticated — same `127.0.0.1` default as v0.7.x.
234///
235/// Public routes (`/health`, `/openapi.json`) are always exempt from
236/// auth — load balancers, uptime monitors, and codegen tools shouldn't
237/// need credentials.
238pub fn router_with_auth_config(state: SoloHttpState, auth: Option<AuthConfig>) -> Router {
239    let cors = build_cors_layer();
240    // Public, always-unauthenticated routes:
241    //   - GET /health: liveness probe (load balancers, uptime monitors).
242    //   - GET /openapi.json: machine-readable API description for client
243    //     codegen + browser-UI tooling (TypeScript / OpenAPI Generator,
244    //     curl-tools, etc.). The spec describes the API shape, not
245    //     secrets — fine to serve unauthenticated even on a LAN-bound
246    //     instance.
247    let public = Router::new()
248        .route("/health", get(|| async { "ok" }))
249        .route("/openapi.json", get(openapi_handler));
250
251    let authed = Router::new()
252        .route("/memory", post(remember_handler))
253        .route("/memory/search", post(recall_handler))
254        .route("/memory/consolidate", post(consolidate_handler))
255        .route("/memory/{id}", get(inspect_handler).delete(forget_handler))
256        .route("/backup", post(backup_handler))
257        // Path 1 derived-layer endpoints (v0.4.0+). GET-shaped because
258        // these are pure read-only queries; query-string params for
259        // simple filters keep them curl-friendly without a JSON body.
260        .route("/memory/themes", get(themes_handler))
261        .route("/memory/facts_about", get(facts_about_handler))
262        .route("/memory/contradictions", get(contradictions_handler))
263        // v0.5.0 Priority 3: drill into one cluster + abstraction +
264        // episodes. Two-segment path (`/memory/clusters/{id}`) so it
265        // does not shadow the single-segment `/memory/{id}` UUID
266        // inspect route.
267        .route(
268            "/memory/clusters/{cluster_id}",
269            get(inspect_cluster_handler),
270        )
271        // v0.7.0 P6: document operations. Two-segment paths
272        // (`/memory/documents/...`) so they don't shadow the
273        // single-segment `/memory/{id}` episode-inspect route. Order
274        // matters: register the literal `/memory/documents/search`
275        // ahead of `/memory/documents/{id}` so axum's matcher prefers
276        // the literal over the path parameter.
277        .route(
278            "/memory/documents/search",
279            post(search_docs_handler),
280        )
281        .route(
282            "/memory/documents",
283            post(ingest_document_handler).get(list_documents_handler),
284        )
285        .route(
286            "/memory/documents/{id}",
287            get(inspect_document_handler).delete(forget_document_handler),
288        )
289        // v0.9.x: graph drill-down for solo-web. Read-only neighbor
290        // expansion off any node in the memory graph. See
291        // `docs/dev-log/0105-solo-web-scoping.md` §4 + the impl dev log
292        // for the full `/v1/graph/*` family this is the first of.
293        .route("/v1/graph/expand", get(graph_expand_handler))
294        // v0.10.0: paginated catalog reads for solo-web's initial graph
295        // render. See `docs/dev-log/0114-graph-nodes-edges-impl.md`
296        // alongside the same scoping doc.
297        .route("/v1/graph/nodes", get(graph_nodes_handler))
298        .route("/v1/graph/edges", get(graph_edges_handler))
299        // v0.10.0: kind-discriminated full-record drill for solo-web's
300        // inspector panel. See `docs/dev-log/0115-graph-inspect-impl.md`.
301        .route("/v1/graph/inspect/{id}", get(graph_inspect_handler))
302        // v0.10.0: unified explicit + HNSW-semantic neighbors for solo-
303        // web's "show similar" overlay. See
304        // `docs/dev-log/0116-graph-neighbors-impl.md`.
305        .route("/v1/graph/neighbors/{id}", get(graph_neighbors_handler))
306        // v0.10.0: Server-Sent Events stream of graph-data invalidations
307        // for solo-web's live update story. The wire format is
308        // INVALIDATION-shaped (`{reason, tenant_id, ts_ms, kind}`) per
309        // scoping doc §3 Decision C — clients refetch the affected page
310        // on each event rather than receiving row payloads. See
311        // `docs/dev-log/0117-graph-stream-impl.md`.
312        .route("/v1/graph/stream", get(graph_stream_handler))
313        // v0.10.0: principal-scoped tenant list for solo-web's top-bar
314        // tenant picker. Read-only — admin CRUD (create/delete) remains
315        // CLI-only per ADR-0004 §"Admin operations". The visibility
316        // filter is principal-driven: no-auth + bearer principals see
317        // every active tenant; OIDC principals see only the tenant
318        // named by their `tenant_claim`. See
319        // `docs/dev-log/0119-tenants-list-impl.md` + scoping doc §3
320        // Decision F + §4 Route 6.
321        .route("/v1/tenants", get(tenants_list_handler))
322        // v0.10.2: MCP-over-HTTP transport on /mcp. Lets one Solo
323        // process serve both `/v1/graph/*` (REST, for solo-web) and
324        // `/mcp` (JSON-RPC, for solo-jarvis) without the
325        // single-writer-per-data-dir lock dance. See
326        // `docs/dev-log/0129-v0.10.2-mcp-over-http-impl.md` for the spec.
327        // POST + GET share the same path; axum's `MethodRouter` muxes
328        // by HTTP method. OPTIONS is handled by the `CorsLayer`
329        // (already wired below) — we don't need an explicit handler.
330        .route("/mcp", post(mcp_http_post_handler).get(mcp_http_get_handler))
331        .with_state(state.clone());
332
333    let authed = if let Some(cfg) = auth {
334        // v0.8.0 P3: dispatch via AuthValidator (bearer | OIDC), inserts
335        // AuthenticatedPrincipal into request extensions for the
336        // TenantExtractor + audit-log to read.
337        let validator = Arc::new(AuthValidator::from_config(
338            &cfg,
339            state.default_tenant.clone(),
340        ));
341        authed.layer(axum::middleware::from_fn_with_state(
342            validator,
343            crate::auth::middleware::auth_middleware,
344        ))
345    } else {
346        authed
347    };
348
349    public
350        .merge(authed)
351        .layer(cors)
352        .layer(TraceLayer::new_for_http())
353}
354
355/// Convenience wrapper: no auth (loopback-only deployments).
356pub fn router(state: SoloHttpState) -> Router {
357    router_with_auth_config(state, None)
358}
359
360fn build_cors_layer() -> CorsLayer {
361    // Permissive-localhost CORS: allow any localhost / 127.0.0.1 origin so
362    // browser-based UIs running on a different local port can call the API
363    // without preflight friction. We do NOT use `Any` because that would
364    // allow arbitrary remote origins to talk to our localhost server via
365    // a victim's browser. With bearer-token auth enabled the practical
366    // impact is reduced (the cross-origin attacker still can't supply
367    // the token), but principle of least privilege says refuse anyway.
368    //
369    // When the server is bound to a non-loopback address (auth required),
370    // the same CORS predicate keeps localhost-only browser clients —
371    // suitable for trusted-LAN deployments where the LAN client itself
372    // tunnels through ssh/wireguard back to localhost. Wider CORS for
373    // genuine cross-origin browser use is a future config knob.
374    CorsLayer::new()
375        .allow_origin(AllowOrigin::predicate(|origin: &HeaderValue, _req| {
376            origin
377                .to_str()
378                .map(is_localhost_origin)
379                .unwrap_or(false)
380        }))
381        .allow_methods([Method::GET, Method::POST, Method::DELETE, Method::OPTIONS])
382        .allow_headers([
383            axum::http::header::CONTENT_TYPE,
384            axum::http::header::AUTHORIZATION,
385            // Custom Solo headers — browsers preflight-check these and
386            // refuse the actual request if they're not in the allow list.
387            // Without `x-solo-tenant` solo-web's browser fetches all fail
388            // with "Failed to fetch" (CORS preflight rejection).
389            axum::http::HeaderName::from_static("x-solo-tenant"),
390            // v0.10.2: `Mcp-Session-Id` is part of the MCP Streamable
391            // HTTP transport spec (sessions, resumable streams). Solo's
392            // v0.10.2 `/mcp` route does NOT implement sessions yet —
393            // each POST is one-shot — but the header is in the
394            // allow-list ahead of time so browser-based MCP clients
395            // that preflight for it (per the spec) succeed instead of
396            // failing with a CORS error before the first request even
397            // lands. v0.10.3+ wires the actual session affinity.
398            axum::http::HeaderName::from_static("mcp-session-id"),
399        ])
400}
401
402/// True if `origin` is `http(s)://localhost[:port]` or
403/// `http(s)://127.0.0.1[:port]` or `http(s)://[::1][:port]` (loopback IPv6).
404/// Anything else (incl. nip.io tricks like `127.0.0.1.nip.io`) is rejected.
405fn is_localhost_origin(origin: &str) -> bool {
406    let rest = origin
407        .strip_prefix("http://")
408        .or_else(|| origin.strip_prefix("https://"));
409    let host = match rest {
410        Some(r) => r,
411        None => return false,
412    };
413    // Strip path (shouldn't appear on Origin headers but defend anyway).
414    let host = host.split('/').next().unwrap_or(host);
415    // Strip port.
416    let host = if let Some(idx) = host.rfind(':') {
417        // For [::1]:port, keep the brackets in the host part.
418        if host.starts_with('[') {
419            // Find matching ']'; everything up to and including it is the host.
420            host.find(']')
421                .map(|i| &host[..=i])
422                .unwrap_or(host)
423        } else {
424            &host[..idx]
425        }
426    } else {
427        host
428    };
429    matches!(host, "localhost" | "127.0.0.1" | "[::1]")
430}
431
432/// Bind + serve (v0.7.x legacy shape). `shutdown` is awaited inside
433/// axum's `with_graceful_shutdown`; resolving it triggers a clean drain.
434/// `bearer_token = None` runs unauthenticated (loopback default);
435/// `Some(t)` requires `Authorization: Bearer t` on every request
436/// except `GET /health` + `GET /openapi.json`.
437pub async fn serve_http(
438    addr: SocketAddr,
439    state: SoloHttpState,
440    bearer_token: Option<String>,
441    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
442) -> std::io::Result<()> {
443    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
444    serve_http_with_auth_config(addr, state, auth, shutdown).await
445}
446
447/// Bind + serve with a config-driven auth block (v0.8.0 P3+).
448/// `auth = None` runs unauthenticated. See [`router_with_auth_config`]
449/// for the auth-mode semantics.
450pub async fn serve_http_with_auth_config(
451    addr: SocketAddr,
452    state: SoloHttpState,
453    auth: Option<AuthConfig>,
454    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
455) -> std::io::Result<()> {
456    let auth_kind = match &auth {
457        Some(AuthConfig::Bearer { .. }) => "bearer",
458        Some(AuthConfig::Oidc { .. }) => "oidc",
459        None => "none",
460    };
461    let app = router_with_auth_config(state, auth);
462    let listener = tokio::net::TcpListener::bind(addr).await?;
463    tracing::info!(%addr, auth = auth_kind, "solo http: listening");
464    axum::serve(listener, app)
465        .with_graceful_shutdown(shutdown)
466        .await
467}
468
469// ---------------------------------------------------------------------------
470// OpenAPI 3.1 spec
471// ---------------------------------------------------------------------------
472
473/// Serve the hand-crafted OpenAPI 3.1 spec at `GET /openapi.json`.
474///
475/// We keep the spec hand-written (rather than deriving via `utoipa`)
476/// for v0.1: 4 simple endpoints, types live across crate boundaries
477/// (`solo_query::RecallResult`, `solo_query::EpisodeRecord`), and a
478/// `utoipa` retrofit would touch every crate. Hand-crafted is one
479/// JSON literal in this file; a smoke test in `handler_tests` parses
480/// the response and asserts the expected paths + components are
481/// present, so drift between spec and code is caught at PR time.
482async fn openapi_handler() -> Json<serde_json::Value> {
483    Json(openapi_spec())
484}
485
486/// Build the OpenAPI 3.1 spec describing Solo's HTTP transport.
487/// Public so the smoke test + future client-codegen tooling can
488/// produce the same document without spinning up the server.
489pub fn openapi_spec() -> serde_json::Value {
490    serde_json::json!({
491        "openapi": "3.1.0",
492        "info": {
493            "title": "Solo HTTP API",
494            "description":
495                "Local-first personal memory daemon. The HTTP transport \
496                 mirrors the four MCP tools (memory_remember / recall / \
497                 inspect / forget). Default deployment is loopback-only \
498                 (127.0.0.1); LAN-bound deployments require a bearer \
499                 token via `solo http-serve --bind <ip> --bearer-token-file <path>`.",
500            "version": env!("CARGO_PKG_VERSION"),
501            "license": { "name": "Apache-2.0" }
502        },
503        "servers": [
504            { "url": "http://127.0.0.1:7437", "description": "Default loopback (replace port with your --http-port)" }
505        ],
506        "components": {
507            "securitySchemes": {
508                "bearerAuth": {
509                    "type": "http",
510                    "scheme": "bearer",
511                    "description":
512                        "Bearer-token auth. Required only on LAN-bound deployments \
513                         (`solo http-serve --bind <non-loopback> --bearer-token-file <path>`); \
514                         the default `127.0.0.1` deployment is unauthenticated. \
515                         `GET /health` and `GET /openapi.json` are exempt from auth even \
516                         on bearer-protected instances."
517                }
518            },
519            "schemas": {
520                "RememberRequest": {
521                    "type": "object",
522                    "required": ["content"],
523                    "properties": {
524                        "content": { "type": "string", "minLength": 1, "description": "Episode content to embed + store." },
525                        "source_type": { "type": "string", "description": "Free-form source tag (e.g. `user_message`, `tool_output`). Defaults to `user_message`." },
526                        "source_id": { "type": "string", "description": "Optional upstream ID for traceability." }
527                    },
528                    "additionalProperties": false
529                },
530                "RememberResponse": {
531                    "type": "object",
532                    "required": ["memory_id"],
533                    "properties": {
534                        "memory_id": { "type": "string", "format": "uuid", "description": "UUID v7 assigned to the new episode." }
535                    }
536                },
537                "RecallRequest": {
538                    "type": "object",
539                    "required": ["query"],
540                    "properties": {
541                        "query": { "type": "string", "minLength": 1, "description": "Natural-language query; embedded by the same model as stored episodes." },
542                        "limit": { "type": "integer", "minimum": 1, "maximum": 50, "default": 5, "description": "Max number of hits to return." }
543                    },
544                    "additionalProperties": false
545                },
546                "RecallResult": {
547                    "type": "object",
548                    "description":
549                        "Recall response. Fields are stable across v0.1 but not exhaustively documented here — \
550                         see `solo_query::RecallResult` in the source for the canonical shape. \
551                         Treat as a forward-compatible JSON object.",
552                    "additionalProperties": true
553                },
554                "ConsolidationScope": {
555                    "type": "object",
556                    "description": "Filter + flags for consolidation. All fields optional; empty body = unbounded defaults.",
557                    "properties": {
558                        "window_days": { "type": "integer", "nullable": true, "description": "Restrict to memories with ts_ms >= now - window_days * 86400000. Null/omitted = unbounded." },
559                        "force_merge": { "type": "boolean", "default": false, "description": "Run the existing-vs-existing merge + abstraction-regen passes even with zero unclustered candidates. Drift catch-up on quiet corpora. Added in 0.3.1." }
560                    },
561                    "additionalProperties": false
562                },
563                "ConsolidationReport": {
564                    "type": "object",
565                    "required": [
566                        "episodes_seen", "clusters_built", "clusters_merged",
567                        "clusters_absorbed", "existing_clusters_merged",
568                        "episodes_clustered", "abstractions_built",
569                        "abstractions_regenerated", "triples_built",
570                        "contradictions_found"
571                    ],
572                    "properties": {
573                        "episodes_seen":             { "type": "integer", "minimum": 0 },
574                        "clusters_built":            { "type": "integer", "minimum": 0, "description": "Brand-new clusters that survived to be persisted (post in-run-merge, post cross-run-absorb)." },
575                        "clusters_merged":           { "type": "integer", "minimum": 0, "description": "In-run merge: clusters absorbed into a sibling within this consolidate run (cross-UTC-bucket case). Counts losers." },
576                        "clusters_absorbed":         { "type": "integer", "minimum": 0, "description": "Cross-run absorb: freshly-built clusters folded into a pre-existing DB cluster with a similar centroid. Counts new-side clusters." },
577                        "existing_clusters_merged":  { "type": "integer", "minimum": 0, "description": "Existing-vs-existing merge: pre-existing DB clusters that drifted toward each other and now coalesce. Counts losers." },
578                        "episodes_clustered":        { "type": "integer", "minimum": 0 },
579                        "abstractions_built":        { "type": "integer", "minimum": 0, "description": "Fresh abstractions persisted for newly-built clusters. 0 when no LlmClient is wired." },
580                        "abstractions_regenerated":  { "type": "integer", "minimum": 0, "description": "Existing clusters whose stale abstractions were dropped and rebuilt because absorb or existing-merge changed their episode set. 0 without an LlmClient." },
581                        "triples_built":             { "type": "integer", "minimum": 0 },
582                        "contradictions_found":      { "type": "integer", "minimum": 0 }
583                    }
584                },
585                "EpisodeRecord": {
586                    "type": "object",
587                    "description":
588                        "Inspect response: full episode record. Fields are stable across v0.1 but not \
589                         exhaustively documented here — see `solo_query::EpisodeRecord` in the source. \
590                         Treat as a forward-compatible JSON object.",
591                    "additionalProperties": true
592                },
593                "ThemeHit": {
594                    "type": "object",
595                    "description":
596                        "One cluster + its (optional) abstraction. Returned by GET /memory/themes. \
597                         See `solo_query::ThemeHit` for the canonical shape: cluster_id, \
598                         abstraction_id?, abstraction_text?, episode_count, coherence, created_at_ms.",
599                    "additionalProperties": true
600                },
601                "FactHit": {
602                    "type": "object",
603                    "description":
604                        "One Steward-extracted SPO triple. Returned by GET /memory/facts_about. \
605                         See `solo_query::FactHit` for fields: triple_id, subject_id, predicate, \
606                         object_id, object_kind, valid_from_ms, valid_to_ms?, confidence, cluster_id?.",
607                    "additionalProperties": true
608                },
609                "ContradictionHit": {
610                    "type": "object",
611                    "description":
612                        "One Steward-flagged contradiction with each side's triple LEFT JOIN'd in. \
613                         Returned by GET /memory/contradictions. See `solo_query::ContradictionHit`: \
614                         a_id, b_id, kind, explanation, detected_at_ms, a_triple?, b_triple?.",
615                    "additionalProperties": true
616                },
617                "ClusterRecord": {
618                    "type": "object",
619                    "description":
620                        "Snapshot of one cluster — its row, optional abstraction, and source episodes \
621                         (content truncated to 200 chars unless ?full_content=true). Returned by \
622                         GET /memory/clusters/{cluster_id}. See `solo_query::ClusterRecord`.",
623                    "additionalProperties": true
624                },
625                "IngestDocumentRequest": {
626                    "type": "object",
627                    "required": ["path"],
628                    "properties": {
629                        "path": {
630                            "type": "string",
631                            "minLength": 1,
632                            "description":
633                                "Server-side absolute path to the file to ingest. The file must be \
634                                 readable by the Solo process. Supported formats: plaintext / \
635                                 markdown / code, HTML, PDF."
636                        }
637                    },
638                    "additionalProperties": false
639                },
640                "IngestReport": {
641                    "type": "object",
642                    "description":
643                        "Returned by POST /memory/documents. Reports the document id assigned, \
644                         the number of chunks persisted + embedded, the total byte size, and a \
645                         `deduped` flag (true when the same content_hash was already present and \
646                         the existing doc_id was returned unchanged). See `solo_storage::IngestReport`.",
647                    "required": ["doc_id", "chunks_persisted", "bytes_ingested", "deduped"],
648                    "properties": {
649                        "doc_id":            { "type": "string", "format": "uuid" },
650                        "chunks_persisted":  { "type": "integer", "minimum": 0 },
651                        "bytes_ingested":    { "type": "integer", "minimum": 0, "format": "int64" },
652                        "deduped":           { "type": "boolean" }
653                    },
654                    "additionalProperties": false
655                },
656                "ForgetDocumentReport": {
657                    "type": "object",
658                    "description":
659                        "Returned by DELETE /memory/documents/{id}. Reports the doc_id soft-deleted \
660                         and how many chunk rowids were tombstoned in the HNSW index. The chunk rows \
661                         themselves survive in SQL for forensic value. See `solo_storage::ForgetDocumentReport`.",
662                    "required": ["doc_id", "chunks_tombstoned"],
663                    "properties": {
664                        "doc_id":             { "type": "string", "format": "uuid" },
665                        "chunks_tombstoned":  { "type": "integer", "minimum": 0 }
666                    },
667                    "additionalProperties": false
668                },
669                "SearchDocsRequest": {
670                    "type": "object",
671                    "required": ["query"],
672                    "properties": {
673                        "query": { "type": "string", "minLength": 1 },
674                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 }
675                    },
676                    "additionalProperties": false
677                },
678                "DocSearchHit": {
679                    "type": "object",
680                    "description":
681                        "One chunk hit + parent-doc context. Fields per `solo_query::DocSearchHit`: \
682                         chunk_id, doc_id, doc_title?, doc_source?, doc_mime_type?, chunk_index, \
683                         content, cos_distance, start_offset, end_offset.",
684                    "additionalProperties": true
685                },
686                "DocumentInspectResult": {
687                    "type": "object",
688                    "description":
689                        "Returned by GET /memory/documents/{id}. A `document` record (full metadata) \
690                         plus an ordered list of chunk summaries (each preview truncated to 200 \
691                         chars). See `solo_query::DocumentInspectResult`.",
692                    "additionalProperties": true
693                },
694                "DocumentSummary": {
695                    "type": "object",
696                    "description":
697                        "One row from GET /memory/documents. Fields per `solo_query::DocumentSummary`: \
698                         doc_id, title?, source?, mime_type?, ingested_at_ms, chunk_count, status.",
699                    "additionalProperties": true
700                },
701                "ApiError": {
702                    "type": "object",
703                    "required": ["error", "status"],
704                    "properties": {
705                        "error": { "type": "string" },
706                        "status": { "type": "integer", "minimum": 400, "maximum": 599 }
707                    }
708                }
709            }
710        },
711        "paths": {
712            "/health": {
713                "get": {
714                    "summary": "Liveness probe",
715                    "description": "Returns plain text `ok`. Always unauthenticated.",
716                    "responses": {
717                        "200": {
718                            "description": "Server is up.",
719                            "content": { "text/plain": { "schema": { "type": "string", "example": "ok" } } }
720                        }
721                    }
722                }
723            },
724            "/openapi.json": {
725                "get": {
726                    "summary": "Self-describing OpenAPI 3.1 spec",
727                    "description": "Returns this document. Always unauthenticated.",
728                    "responses": {
729                        "200": {
730                            "description": "OpenAPI 3.1 document.",
731                            "content": { "application/json": { "schema": { "type": "object" } } }
732                        }
733                    }
734                }
735            },
736            "/memory": {
737                "post": {
738                    "summary": "Remember (store an episode)",
739                    "description": "Equivalent to MCP tool `memory_remember`.",
740                    "security": [{ "bearerAuth": [] }, {}],
741                    "requestBody": {
742                        "required": true,
743                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberRequest" } } }
744                    },
745                    "responses": {
746                        "200": {
747                            "description": "Memory stored; returns the new MemoryId.",
748                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberResponse" } } }
749                        },
750                        "400": { "description": "Bad request (e.g. empty content).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
751                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
752                    }
753                }
754            },
755            "/memory/search": {
756                "post": {
757                    "summary": "Recall (vector search)",
758                    "description": "Equivalent to MCP tool `memory_recall`. Embeds the query, runs HNSW search, returns the top-K hits in cosine-distance order.",
759                    "security": [{ "bearerAuth": [] }, {}],
760                    "requestBody": {
761                        "required": true,
762                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallRequest" } } }
763                    },
764                    "responses": {
765                        "200": {
766                            "description": "Search results.",
767                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallResult" } } }
768                        },
769                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
770                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
771                    }
772                }
773            },
774            "/memory/consolidate": {
775                "post": {
776                    "summary": "Run a consolidation pass (clustering + abstraction)",
777                    "description":
778                        "Idempotent. Triggers the SWS-equivalent clustering pass; if a `Steward` LLM is wired \
779                         on the server, also runs the REM-equivalent abstraction pass that populates \
780                         `semantic_abstractions` and `triples`. Empty request body = default scope (unbounded \
781                         window). Equivalent to the `solo consolidate` CLI.",
782                    "security": [{ "bearerAuth": [] }, {}],
783                    "requestBody": {
784                        "required": false,
785                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationScope" } } }
786                    },
787                    "responses": {
788                        "200": {
789                            "description": "Consolidation complete; report counts the work done.",
790                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationReport" } } }
791                        },
792                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
793                    }
794                }
795            },
796            "/backup": {
797                "post": {
798                    "summary": "Online encrypted backup",
799                    "description":
800                        "Run an online SQLCipher backup of the live data dir to a server-side path. \
801                         The destination file is encrypted with the same Argon2id-derived raw key as \
802                         the source, so it restores under the same passphrase + a copy of the source's \
803                         `solo.config.toml`. Hot — the backup runs against the writer's existing \
804                         connection without taking the lockfile, so the daemon keeps serving reads + \
805                         writes during the operation. v0.3.2+.",
806                    "security": [{ "bearerAuth": [] }, {}],
807                    "requestBody": {
808                        "required": true,
809                        "content": { "application/json": { "schema": {
810                            "type": "object",
811                            "properties": {
812                                "to": { "type": "string", "description": "Server-side absolute path for the backup file." },
813                                "force": { "type": "boolean", "description": "Overwrite an existing destination file. Default false.", "default": false }
814                            },
815                            "required": ["to"]
816                        } } }
817                    },
818                    "responses": {
819                        "200": {
820                            "description": "Backup complete; reports the destination path + elapsed milliseconds.",
821                            "content": { "application/json": { "schema": {
822                                "type": "object",
823                                "properties": {
824                                    "path": { "type": "string" },
825                                    "elapsed_ms": { "type": "integer", "format": "int64" }
826                                }
827                            } } }
828                        },
829                        "400": { "description": "Destination invalid, exists without force, or its parent doesn't exist." },
830                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
831                        "500": { "description": "Backup failed (disk full, permission denied, etc.)." }
832                    }
833                }
834            },
835            "/memory/{id}": {
836                "get": {
837                    "summary": "Inspect a memory by ID",
838                    "description": "Equivalent to MCP tool `memory_inspect`.",
839                    "security": [{ "bearerAuth": [] }, {}],
840                    "parameters": [{
841                        "name": "id",
842                        "in": "path",
843                        "required": true,
844                        "schema": { "type": "string", "format": "uuid" },
845                        "description": "MemoryId (UUID v7)."
846                    }],
847                    "responses": {
848                        "200": {
849                            "description": "Episode record.",
850                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/EpisodeRecord" } } }
851                        },
852                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
853                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
854                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
855                    }
856                },
857                "delete": {
858                    "summary": "Forget (soft-delete) a memory by ID",
859                    "description":
860                        "Equivalent to MCP tool `memory_forget`. Soft-delete: flips `episodes.status = 'forgotten'` \
861                         and tombstones the HNSW vector. The row + embedding are preserved for forensics; \
862                         re-running `solo reembed` after this does NOT restore visibility.",
863                    "security": [{ "bearerAuth": [] }, {}],
864                    "parameters": [
865                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } },
866                        { "name": "reason", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Free-form reason logged via tracing (not yet persisted to the DB)." }
867                    ],
868                    "responses": {
869                        "204": { "description": "Forgotten (or already forgotten — idempotent)." },
870                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
871                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
872                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
873                    }
874                }
875            },
876            "/memory/themes": {
877                "get": {
878                    "summary": "List recent cluster themes",
879                    "description":
880                        "Equivalent to MCP tool `memory_themes`. List cluster abstractions ordered by \
881                         most-recent first. Use to surface 'what has the user been thinking about lately' \
882                         without paging through individual episodes. v0.4.0+.",
883                    "security": [{ "bearerAuth": [] }, {}],
884                    "parameters": [
885                        { "name": "window_days", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1 }, "description": "Optional time window. Omit for unfiltered (all-time, most-recent first)." },
886                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
887                    ],
888                    "responses": {
889                        "200": {
890                            "description": "Array of ThemeHits (possibly empty).",
891                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ThemeHit" } } } }
892                        },
893                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
894                    }
895                }
896            },
897            "/memory/facts_about": {
898                "get": {
899                    "summary": "Query the SPO knowledge graph by subject",
900                    "description":
901                        "Equivalent to MCP tool `memory_facts_about`. Query Steward-extracted triples by \
902                         subject + optional predicate + optional time window. Subject is required \
903                         (predicate-only scans not supported). Pass `include_as_object=true` (v0.5.1+) \
904                         to also surface rows where `subject` appears as the object. v0.4.0+.",
905                    "security": [{ "bearerAuth": [] }, {}],
906                    "parameters": [
907                        { "name": "subject", "in": "query", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Subject id to query (e.g. `Sam`)." },
908                        { "name": "predicate", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Optional predicate filter (e.g. `works_at`)." },
909                        { "name": "since_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_from_ms lower bound (epoch ms)." },
910                        { "name": "until_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through." },
911                        { "name": "include_as_object", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, also match rows where `subject` appears as the object (e.g. surface 'Sam pushes back on PRs about Maya' under subject='Maya'). Default false. v0.5.1+." },
912                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
913                    ],
914                    "responses": {
915                        "200": {
916                            "description": "Array of FactHits (possibly empty).",
917                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/FactHit" } } } }
918                        },
919                        "400": { "description": "Bad request (e.g. empty subject).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
920                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
921                    }
922                }
923            },
924            "/memory/contradictions": {
925                "get": {
926                    "summary": "List Steward-flagged contradictions",
927                    "description":
928                        "Equivalent to MCP tool `memory_contradictions`. Each result includes both \
929                         sides' triple SPO via LEFT JOIN for context. v0.4.0+.",
930                    "security": [{ "bearerAuth": [] }, {}],
931                    "parameters": [
932                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
933                    ],
934                    "responses": {
935                        "200": {
936                            "description": "Array of ContradictionHits (possibly empty).",
937                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ContradictionHit" } } } }
938                        },
939                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
940                    }
941                }
942            },
943            "/memory/clusters/{cluster_id}": {
944                "get": {
945                    "summary": "Inspect a single cluster",
946                    "description":
947                        "Equivalent to MCP tool `memory_inspect_cluster`. Returns the cluster row, \
948                         its (optional) abstraction, and its source episodes. By default each \
949                         episode's `content` is truncated to 200 chars with a trailing `…`. Pass \
950                         `?full_content=true` to get verbatim episode content. v0.5.0+.",
951                    "security": [{ "bearerAuth": [] }, {}],
952                    "parameters": [
953                        { "name": "cluster_id", "in": "path", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Cluster id (from a previous GET /memory/themes response)." },
954                        { "name": "full_content", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, return episode content verbatim. Default false (truncate to 200 chars + ellipsis)." }
955                    ],
956                    "responses": {
957                        "200": {
958                            "description": "Cluster snapshot.",
959                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ClusterRecord" } } }
960                        },
961                        "400": { "description": "Bad request (e.g. empty cluster_id).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
962                        "404": { "description": "No such cluster.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
963                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
964                    }
965                }
966            },
967            "/memory/documents": {
968                "post": {
969                    "summary": "Ingest a document",
970                    "description":
971                        "Equivalent to MCP tool `memory_ingest_document`. Reads the file at the \
972                         supplied server-side path, parses + chunks + embeds, and persists under \
973                         `documents` + `document_chunks`. Returns the new doc_id, chunk count, and \
974                         a `deduped` flag (true when an existing document with the same content_hash \
975                         was returned without re-embedding). v0.7.0+.",
976                    "security": [{ "bearerAuth": [] }, {}],
977                    "requestBody": {
978                        "required": true,
979                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestDocumentRequest" } } }
980                    },
981                    "responses": {
982                        "200": {
983                            "description": "Document ingested (or deduplicated).",
984                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestReport" } } }
985                        },
986                        "400": { "description": "Bad request (e.g. empty path, file unreadable, parse error).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
987                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
988                    }
989                },
990                "get": {
991                    "summary": "List ingested documents (paginated)",
992                    "description":
993                        "Equivalent to MCP tool `memory_list_documents`. Returns a paginated index, \
994                         newest first. Forgotten documents are hidden by default; pass \
995                         `?include_forgotten=true` to see them too. v0.7.0+.",
996                    "security": [{ "bearerAuth": [] }, {}],
997                    "parameters": [
998                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 20 } },
999                        { "name": "offset", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 0, "default": 0 } },
1000                        { "name": "include_forgotten", "in": "query", "required": false, "schema": { "type": "boolean", "default": false } }
1001                    ],
1002                    "responses": {
1003                        "200": {
1004                            "description": "Array of DocumentSummary (possibly empty).",
1005                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocumentSummary" } } } }
1006                        },
1007                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1008                    }
1009                }
1010            },
1011            "/memory/documents/search": {
1012                "post": {
1013                    "summary": "Vector search across document chunks",
1014                    "description":
1015                        "Equivalent to MCP tool `memory_search_docs`. Embeds the query and returns \
1016                         up to `limit` matching chunks, best match first, each annotated with the \
1017                         parent document's title + source path. Forgotten documents are excluded. \
1018                         v0.7.0+.",
1019                    "security": [{ "bearerAuth": [] }, {}],
1020                    "requestBody": {
1021                        "required": true,
1022                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SearchDocsRequest" } } }
1023                    },
1024                    "responses": {
1025                        "200": {
1026                            "description": "Array of DocSearchHits (possibly empty).",
1027                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocSearchHit" } } } }
1028                        },
1029                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1030                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1031                    }
1032                }
1033            },
1034            "/memory/documents/{id}": {
1035                "get": {
1036                    "summary": "Inspect one document",
1037                    "description":
1038                        "Equivalent to MCP tool `memory_inspect_document`. Returns the document's \
1039                         metadata plus a preview of every chunk (truncated to 200 chars). v0.7.0+.",
1040                    "security": [{ "bearerAuth": [] }, {}],
1041                    "parameters": [
1042                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "DocumentId (UUID v7)." }
1043                    ],
1044                    "responses": {
1045                        "200": {
1046                            "description": "Document inspection result.",
1047                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/DocumentInspectResult" } } }
1048                        },
1049                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1050                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1051                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1052                    }
1053                },
1054                "delete": {
1055                    "summary": "Forget (soft-delete) one document",
1056                    "description":
1057                        "Equivalent to MCP tool `memory_forget_document`. Flips `documents.status` \
1058                         to `forgotten` and tombstones every chunk's HNSW rowid. The chunk rows \
1059                         survive in SQL for forensic value. v0.7.0+.",
1060                    "security": [{ "bearerAuth": [] }, {}],
1061                    "parameters": [
1062                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } }
1063                    ],
1064                    "responses": {
1065                        "200": {
1066                            "description": "Document soft-deleted; report counts chunks tombstoned.",
1067                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ForgetDocumentReport" } } }
1068                        },
1069                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1070                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
1071                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
1072                    }
1073                }
1074            }
1075        }
1076    })
1077}
1078
1079// ---------------------------------------------------------------------------
1080// Handlers
1081// ---------------------------------------------------------------------------
1082
1083#[derive(Debug, Deserialize)]
1084struct RememberBody {
1085    content: String,
1086    #[serde(default)]
1087    source_type: Option<String>,
1088    #[serde(default)]
1089    source_id: Option<String>,
1090}
1091
1092#[derive(Debug, Serialize)]
1093struct RememberResponse {
1094    memory_id: String,
1095}
1096
1097async fn remember_handler(
1098    TenantExtractor(tenant): TenantExtractor,
1099    AuditPrincipal(principal): AuditPrincipal,
1100    Json(body): Json<RememberBody>,
1101) -> Result<Json<RememberResponse>, ApiError> {
1102    let content = body.content.trim_end().to_string();
1103    if content.is_empty() {
1104        return Err(ApiError::bad_request("content must not be empty"));
1105    }
1106    let embedding = tenant.embedder().embed(&content).await.map_err(ApiError::from)?;
1107    let episode = Episode {
1108        memory_id: MemoryId::new(),
1109        ts_ms: chrono::Utc::now().timestamp_millis(),
1110        source_type: body.source_type.unwrap_or_else(|| "user_message".into()),
1111        source_id: body.source_id,
1112        content,
1113        encoding_context: EncodingContext::default(),
1114        provenance: None,
1115        confidence: Confidence::new(0.9).unwrap(),
1116        strength: 0.5,
1117        salience: 0.5,
1118        tier: Tier::Hot,
1119    };
1120    let mid = tenant
1121        .write()
1122        .remember_as(principal, episode, embedding)
1123        .await
1124        .map_err(ApiError::from)?;
1125    Ok(Json(RememberResponse {
1126        memory_id: mid.to_string(),
1127    }))
1128}
1129
1130#[derive(Debug, Deserialize)]
1131struct RecallBody {
1132    query: String,
1133    #[serde(default = "default_limit")]
1134    limit: usize,
1135}
1136
1137fn default_limit() -> usize {
1138    5
1139}
1140
1141async fn recall_handler(
1142    TenantExtractor(tenant): TenantExtractor,
1143    AuditPrincipal(principal): AuditPrincipal,
1144    Json(body): Json<RecallBody>,
1145) -> Result<Json<solo_query::RecallResult>, ApiError> {
1146    // solo_query::run_recall handles empty-query rejection (returns
1147    // InvalidInput → ApiError::bad_request(400)) and clamps limit
1148    // upstream of the embedder call.
1149    let result = solo_query::run_recall(tenant.as_ref(), principal, &body.query, body.limit)
1150        .await
1151        .map_err(ApiError::from)?;
1152    Ok(Json(result))
1153}
1154
1155async fn inspect_handler(
1156    TenantExtractor(tenant): TenantExtractor,
1157    AuditPrincipal(principal): AuditPrincipal,
1158    Path(id): Path<String>,
1159) -> Result<Json<solo_query::EpisodeRecord>, ApiError> {
1160    let mid = MemoryId::from_str(&id)
1161        .map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1162    let row = solo_query::inspect_one(tenant.read(), tenant.audit(), principal, mid)
1163        .await
1164        .map_err(ApiError::from)?;
1165    Ok(Json(row))
1166}
1167
1168// Path 1 derived-layer handlers (v0.4.0+). All three are GET-shaped:
1169// pure read-only queries against the Steward's outputs, query-string
1170// params for simple filters. Each handler delegates to a single
1171// solo_query::derived pipeline and returns the result Vec as JSON.
1172// Empty derived layer → 200 with `[]` body (parseable JSON array).
1173
1174#[derive(Debug, Deserialize)]
1175struct ThemesQuery {
1176    #[serde(default)]
1177    window_days: Option<i64>,
1178    #[serde(default = "default_limit")]
1179    limit: usize,
1180}
1181
1182async fn themes_handler(
1183    TenantExtractor(tenant): TenantExtractor,
1184    AuditPrincipal(principal): AuditPrincipal,
1185    Query(q): Query<ThemesQuery>,
1186) -> Result<Json<Vec<solo_query::ThemeHit>>, ApiError> {
1187    let hits = solo_query::themes(
1188        tenant.read(),
1189        tenant.audit(),
1190        principal,
1191        q.window_days,
1192        q.limit,
1193    )
1194    .await
1195    .map_err(ApiError::from)?;
1196    Ok(Json(hits))
1197}
1198
1199#[derive(Debug, Deserialize)]
1200struct FactsAboutQuery {
1201    subject: String,
1202    #[serde(default)]
1203    predicate: Option<String>,
1204    #[serde(default)]
1205    since_ms: Option<i64>,
1206    #[serde(default)]
1207    until_ms: Option<i64>,
1208    /// v0.5.1 Priority 8 — widen the query to also match rows where
1209    /// `subject` appears as the object. Default `false`.
1210    #[serde(default)]
1211    include_as_object: bool,
1212    #[serde(default = "default_limit")]
1213    limit: usize,
1214}
1215
1216async fn facts_about_handler(
1217    State(s): State<SoloHttpState>,
1218    TenantExtractor(tenant): TenantExtractor,
1219    AuditPrincipal(principal): AuditPrincipal,
1220    Query(q): Query<FactsAboutQuery>,
1221) -> Result<Json<Vec<solo_query::FactHit>>, ApiError> {
1222    if q.subject.trim().is_empty() {
1223        return Err(ApiError::bad_request("subject must not be empty"));
1224    }
1225    let hits = solo_query::facts_about(
1226        tenant.read(),
1227        tenant.audit(),
1228        principal,
1229        &q.subject,
1230        &s.user_aliases,
1231        q.include_as_object,
1232        q.predicate.as_deref(),
1233        q.since_ms,
1234        q.until_ms,
1235        q.limit,
1236    )
1237    .await
1238    .map_err(ApiError::from)?;
1239    Ok(Json(hits))
1240}
1241
1242#[derive(Debug, Deserialize)]
1243struct ContradictionsQuery {
1244    #[serde(default = "default_limit")]
1245    limit: usize,
1246}
1247
1248async fn contradictions_handler(
1249    TenantExtractor(tenant): TenantExtractor,
1250    AuditPrincipal(principal): AuditPrincipal,
1251    Query(q): Query<ContradictionsQuery>,
1252) -> Result<Json<Vec<solo_query::ContradictionHit>>, ApiError> {
1253    let hits = solo_query::contradictions(tenant.read(), tenant.audit(), principal, q.limit)
1254        .await
1255        .map_err(ApiError::from)?;
1256    Ok(Json(hits))
1257}
1258
1259#[derive(Debug, Deserialize, Default)]
1260struct InspectClusterQuery {
1261    /// Default `false` — episode `content` is truncated to
1262    /// `solo_query::EPISODE_TRUNCATE_CHARS` chars with a trailing `…`.
1263    /// `?full_content=true` returns each episode's content verbatim.
1264    #[serde(default)]
1265    full_content: bool,
1266}
1267
1268async fn inspect_cluster_handler(
1269    TenantExtractor(tenant): TenantExtractor,
1270    AuditPrincipal(principal): AuditPrincipal,
1271    Path(cluster_id): Path<String>,
1272    Query(q): Query<InspectClusterQuery>,
1273) -> Result<Json<solo_query::ClusterRecord>, ApiError> {
1274    if cluster_id.trim().is_empty() {
1275        return Err(ApiError::bad_request("cluster_id must not be empty"));
1276    }
1277    let record = solo_query::inspect_cluster(
1278        tenant.read(),
1279        tenant.audit(),
1280        principal,
1281        &cluster_id,
1282        q.full_content,
1283    )
1284    .await
1285    .map_err(ApiError::from)?;
1286    Ok(Json(record))
1287}
1288
1289// ---------------------------------------------------------------------------
1290// Document handlers (v0.7.0 P6)
1291// ---------------------------------------------------------------------------
1292
1293#[derive(Debug, Deserialize)]
1294struct IngestDocumentBody {
1295    /// Server-side absolute path to the file. Must be readable by the
1296    /// Solo process. The writer reads, parses, chunks, and embeds.
1297    path: String,
1298}
1299
1300async fn ingest_document_handler(
1301    TenantExtractor(tenant): TenantExtractor,
1302    AuditPrincipal(principal): AuditPrincipal,
1303    Json(body): Json<IngestDocumentBody>,
1304) -> Result<Json<solo_storage::IngestReport>, ApiError> {
1305    if body.path.trim().is_empty() {
1306        return Err(ApiError::bad_request("path must not be empty"));
1307    }
1308    let path = std::path::PathBuf::from(body.path);
1309    let chunk_config = solo_storage::document::ChunkConfig::default();
1310    let report = tenant
1311        .write()
1312        .ingest_document_as(principal, path, chunk_config)
1313        .await
1314        .map_err(ApiError::from)?;
1315    Ok(Json(report))
1316}
1317
1318#[derive(Debug, Deserialize)]
1319struct SearchDocsBody {
1320    query: String,
1321    #[serde(default = "default_limit")]
1322    limit: usize,
1323}
1324
1325async fn search_docs_handler(
1326    TenantExtractor(tenant): TenantExtractor,
1327    AuditPrincipal(principal): AuditPrincipal,
1328    Json(body): Json<SearchDocsBody>,
1329) -> Result<Json<Vec<solo_query::DocSearchHit>>, ApiError> {
1330    let hits = solo_query::run_doc_search(tenant.as_ref(), principal, &body.query, body.limit)
1331        .await
1332        .map_err(ApiError::from)?;
1333    Ok(Json(hits))
1334}
1335
1336async fn inspect_document_handler(
1337    TenantExtractor(tenant): TenantExtractor,
1338    AuditPrincipal(principal): AuditPrincipal,
1339    Path(id): Path<String>,
1340) -> Result<Json<solo_query::DocumentInspectResult>, ApiError> {
1341    let doc_id = DocumentId::from_str(&id)
1342        .map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1343    let result_opt =
1344        solo_query::inspect_document(tenant.read(), tenant.audit(), principal, &doc_id)
1345            .await
1346            .map_err(ApiError::from)?;
1347    match result_opt {
1348        Some(record) => Ok(Json(record)),
1349        None => Err(ApiError::not_found(format!("document {doc_id} not found"))),
1350    }
1351}
1352
1353#[derive(Debug, Deserialize)]
1354struct ListDocumentsQuery {
1355    #[serde(default = "default_list_documents_limit")]
1356    limit: usize,
1357    #[serde(default)]
1358    offset: usize,
1359    #[serde(default)]
1360    include_forgotten: bool,
1361}
1362
1363fn default_list_documents_limit() -> usize {
1364    20
1365}
1366
1367async fn list_documents_handler(
1368    TenantExtractor(tenant): TenantExtractor,
1369    AuditPrincipal(principal): AuditPrincipal,
1370    Query(q): Query<ListDocumentsQuery>,
1371) -> Result<Json<Vec<solo_query::DocumentSummary>>, ApiError> {
1372    let rows = solo_query::list_documents(
1373        tenant.read(),
1374        tenant.audit(),
1375        principal,
1376        q.limit,
1377        q.offset,
1378        q.include_forgotten,
1379    )
1380    .await
1381    .map_err(ApiError::from)?;
1382    Ok(Json(rows))
1383}
1384
1385async fn forget_document_handler(
1386    TenantExtractor(tenant): TenantExtractor,
1387    AuditPrincipal(principal): AuditPrincipal,
1388    Path(id): Path<String>,
1389) -> Result<Json<solo_storage::ForgetDocumentReport>, ApiError> {
1390    let doc_id = DocumentId::from_str(&id)
1391        .map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1392    let report = tenant
1393        .write()
1394        .forget_document_as(principal, doc_id)
1395        .await
1396        .map_err(ApiError::from)?;
1397    Ok(Json(report))
1398}
1399
1400#[derive(Debug, Deserialize)]
1401struct ForgetQuery {
1402    #[serde(default)]
1403    reason: Option<String>,
1404}
1405
1406async fn forget_handler(
1407    TenantExtractor(tenant): TenantExtractor,
1408    AuditPrincipal(principal): AuditPrincipal,
1409    Path(id): Path<String>,
1410    Query(q): Query<ForgetQuery>,
1411) -> Result<StatusCode, ApiError> {
1412    let mid = MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
1413    let reason = q.reason.unwrap_or_else(|| "http".into());
1414    tenant
1415        .write()
1416        .forget_as(principal, mid, reason)
1417        .await
1418        .map_err(ApiError::from)?;
1419    Ok(StatusCode::NO_CONTENT)
1420}
1421
1422async fn consolidate_handler(
1423    TenantExtractor(tenant): TenantExtractor,
1424    AuditPrincipal(principal): AuditPrincipal,
1425    body: axum::body::Bytes,
1426) -> Result<Json<solo_storage::ConsolidationReport>, ApiError> {
1427    // Empty body = default scope (unbounded window). We parse via
1428    // `Bytes` rather than `Option<Json<T>>` because axum's `Json`
1429    // extractor 400s on an empty body when Content-Type is JSON
1430    // (it can't deserialize zero bytes as `T`), and the `Option`
1431    // wrapper doesn't reliably degrade that failure to `None`.
1432    let scope = if body.is_empty() {
1433        solo_storage::ConsolidationScope::default()
1434    } else {
1435        serde_json::from_slice(&body)
1436            .map_err(|e| ApiError::bad_request(format!("invalid JSON: {e}")))?
1437    };
1438    let report = tenant
1439        .write()
1440        .consolidate_as(principal, scope)
1441        .await
1442        .map_err(ApiError::from)?;
1443    Ok(Json(report))
1444}
1445
1446#[derive(Debug, Deserialize)]
1447struct BackupBody {
1448    /// Server-side absolute path where the backup file should be
1449    /// written. Must be writable by the Solo process. Refuses to
1450    /// overwrite an existing file unless `force = true`.
1451    to: String,
1452    #[serde(default)]
1453    force: bool,
1454}
1455
1456#[derive(Debug, Serialize)]
1457struct BackupResponse {
1458    path: String,
1459    elapsed_ms: u64,
1460}
1461
1462async fn backup_handler(
1463    TenantExtractor(tenant): TenantExtractor,
1464    Json(body): Json<BackupBody>,
1465) -> Result<Json<BackupResponse>, ApiError> {
1466    use std::path::PathBuf;
1467
1468    let dest = PathBuf::from(&body.to);
1469    if dest.as_os_str().is_empty() {
1470        return Err(ApiError::bad_request("`to` must not be empty"));
1471    }
1472    // CRITICAL ORDER: same-file refusal MUST come BEFORE `remove_file`.
1473    // The tenant's source DB path comes from the resolved TenantHandle.
1474    if solo_storage::paths_refer_to_same_file(tenant.db_path(), &dest) {
1475        return Err(ApiError::bad_request(format!(
1476            "destination {} is the same file as the source database; \
1477             refusing to run (would corrupt the live database)",
1478            dest.display()
1479        )));
1480    }
1481    if dest.exists() {
1482        if !body.force {
1483            return Err(ApiError::bad_request(format!(
1484                "destination {} exists; pass force=true to overwrite",
1485                dest.display()
1486            )));
1487        }
1488        std::fs::remove_file(&dest).map_err(|e| {
1489            ApiError::internal(format!(
1490                "remove existing destination {}: {e}",
1491                dest.display()
1492            ))
1493        })?;
1494    }
1495    if let Some(parent) = dest.parent() {
1496        if !parent.as_os_str().is_empty() && !parent.is_dir() {
1497            return Err(ApiError::bad_request(format!(
1498                "destination parent directory {} does not exist",
1499                parent.display()
1500            )));
1501        }
1502    }
1503
1504    let started = std::time::Instant::now();
1505    tenant.write().backup(dest.clone()).await.map_err(ApiError::from)?;
1506    let elapsed_ms = started.elapsed().as_millis() as u64;
1507
1508    Ok(Json(BackupResponse {
1509        path: dest.display().to_string(),
1510        elapsed_ms,
1511    }))
1512}
1513
1514// ---------------------------------------------------------------------------
1515// Graph expand (v0.9.x — first /v1/graph/* endpoint for solo-web)
1516// ---------------------------------------------------------------------------
1517//
1518// `GET /v1/graph/expand?node_id=...&kind=...&limit=N` — read-only neighbor
1519// drill off any node. Supports four edge kinds:
1520//   * `cluster_member` — episodes ↔ clusters via `cluster_episodes`.
1521//   * `document_chunk` — documents ↔ chunks via `document_chunks.doc_id`.
1522//   * `triple`         — episodes ↔ entities via `triples` (subject_id /
1523//     object_id / source_episode_id added in migration 0007).
1524//   * `semantic`       — HNSW top-K similar episodes (re-embeds the source
1525//     episode's content via the tenant embedder, then calls the same
1526//     pipeline as `/memory/search`; cheaper than a separate embeddings-
1527//     table fetch path and reuses one well-tested code path).
1528//
1529// **Node-id prefix convention** (locked in this PR; the future
1530// `/v1/graph/nodes` + `/v1/graph/inspect/:id` endpoints will use the
1531// same scheme):
1532//   * `ep:<memory_id>`     — episode (memory_id = UUID v7)
1533//   * `doc:<doc_id>`       — document (doc_id   = UUID v7)
1534//   * `chunk:<chunk_id>`   — chunk    (chunk_id = UUID v7)
1535//   * `cl:<cluster_id>`    — cluster
1536//   * `ent:<value>`        — entity (synthetic — minted from a triple's
1537//     subject_id / object_id; value is the raw string verbatim, no
1538//     URL-encoding — `:` and other punctuation appear in real entity
1539//     ids in the wild).
1540//
1541// Entity nodes are synthetic: there's no `entities` table. They're derived
1542// on-the-fly from triples and only exist in the wire format. Two entity
1543// nodes with the same `ent:<value>` are the same node.
1544//
1545// **Read-only**: no audit emit (lesson #30 — graph expand is a derived view
1546// over already-audited primitives; the explicit-query audit events from
1547// `memory.recall` / `memory.inspect` / `memory.facts_about` cover the
1548// underlying reads).
1549//
1550// Tests live inline in `handler_tests` below.
1551
1552const GRAPH_EXPAND_DEFAULT_LIMIT: u32 = 25;
1553const GRAPH_EXPAND_MAX_LIMIT: u32 = 100;
1554
1555/// Edge-kind discriminator. Drives which expansion path runs and what edge
1556/// kind appears in the response.
1557#[derive(Debug, Clone, Copy, Deserialize)]
1558#[serde(rename_all = "snake_case")]
1559enum GraphExpandKind {
1560    ClusterMember,
1561    DocumentChunk,
1562    Triple,
1563    Semantic,
1564}
1565
1566#[derive(Debug, Deserialize)]
1567struct GraphExpandQuery {
1568    node_id: String,
1569    kind: GraphExpandKind,
1570    #[serde(default)]
1571    limit: Option<u32>,
1572}
1573
1574/// Source-node kind, derived from the `node_id` prefix.
1575#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1576enum NodeKind {
1577    Episode,
1578    Document,
1579    Chunk,
1580    Cluster,
1581    Entity,
1582}
1583
1584impl NodeKind {
1585    fn as_wire_str(self) -> &'static str {
1586        match self {
1587            Self::Episode => "episode",
1588            Self::Document => "document",
1589            Self::Chunk => "chunk",
1590            Self::Cluster => "cluster",
1591            Self::Entity => "entity",
1592        }
1593    }
1594}
1595
1596/// Decompose `<prefix>:<value>` into (kind, raw value). Returns 400 on
1597/// unknown prefix / empty value / no `:`.
1598fn parse_node_id(raw: &str) -> Result<(NodeKind, &str), ApiError> {
1599    let (prefix, value) = raw.split_once(':').ok_or_else(|| {
1600        ApiError::bad_request(format!(
1601            "node_id must be `<prefix>:<value>` (one of ep:/doc:/chunk:/cl:/ent:); got {raw:?}"
1602        ))
1603    })?;
1604    if value.is_empty() {
1605        return Err(ApiError::bad_request(format!(
1606            "node_id value is empty after prefix: {raw:?}"
1607        )));
1608    }
1609    let kind = match prefix {
1610        "ep" => NodeKind::Episode,
1611        "doc" => NodeKind::Document,
1612        "chunk" => NodeKind::Chunk,
1613        "cl" => NodeKind::Cluster,
1614        "ent" => NodeKind::Entity,
1615        other => {
1616            return Err(ApiError::bad_request(format!(
1617                "unknown node_id prefix {other:?}; expected one of ep:/doc:/chunk:/cl:/ent:"
1618            )));
1619        }
1620    };
1621    Ok((kind, value))
1622}
1623
1624/// One node in the graph-expand response. Mirrors solo-web's `GraphNode`
1625/// TS interface (see `solo-web/src/api/types.ts`).
1626#[derive(Debug, Serialize)]
1627struct GraphNode {
1628    id: String,
1629    kind: &'static str,
1630    label: String,
1631    #[serde(skip_serializing_if = "Option::is_none")]
1632    ts_ms: Option<i64>,
1633    tenant_id: String,
1634    #[serde(skip_serializing_if = "Option::is_none")]
1635    preview: Option<String>,
1636}
1637
1638/// One edge. Mirrors `GraphEdge` in solo-web TS types. `id` is a composite
1639/// `${source}--${kind}--${target}` so the renderer can dedupe.
1640#[derive(Debug, Serialize)]
1641struct GraphEdge {
1642    id: String,
1643    source: String,
1644    target: String,
1645    kind: &'static str,
1646    #[serde(skip_serializing_if = "Option::is_none")]
1647    predicate: Option<String>,
1648    #[serde(skip_serializing_if = "Option::is_none")]
1649    weight: Option<f32>,
1650}
1651
1652#[derive(Debug, Serialize)]
1653struct GraphExpandResponse {
1654    nodes: Vec<GraphNode>,
1655    edges: Vec<GraphEdge>,
1656}
1657
1658fn edge_id(source: &str, kind: &str, target: &str) -> String {
1659    format!("{source}--{kind}--{target}")
1660}
1661
1662/// Episode summary needed to mint a `GraphNode` from an episode row.
1663#[derive(Debug)]
1664struct ExpandedEpisode {
1665    memory_id: String,
1666    ts_ms: i64,
1667    content: String,
1668}
1669
1670/// Document summary.
1671#[derive(Debug)]
1672struct ExpandedDocument {
1673    doc_id: String,
1674    title: Option<String>,
1675    source: Option<String>,
1676    ingested_at_ms: i64,
1677}
1678
1679/// Chunk summary.
1680#[derive(Debug)]
1681struct ExpandedChunk {
1682    chunk_id: String,
1683    chunk_index: i64,
1684    content: String,
1685}
1686
1687fn truncate_preview(s: &str, max: usize) -> String {
1688    if s.chars().count() <= max {
1689        return s.to_string();
1690    }
1691    let mut out: String = s.chars().take(max - 1).collect();
1692    out.push('…');
1693    out
1694}
1695
1696/// First-line label cap. Keeps payloads tight for the graph renderer
1697/// (labels are headings, not full content).
1698const GRAPH_LABEL_CHARS: usize = 80;
1699const GRAPH_PREVIEW_CHARS: usize = 200;
1700
1701fn episode_label(content: &str) -> String {
1702    let first_line = content.lines().next().unwrap_or(content);
1703    truncate_preview(first_line, GRAPH_LABEL_CHARS)
1704}
1705
1706fn graph_node_for_episode(tenant_id: &str, ep: &ExpandedEpisode) -> GraphNode {
1707    GraphNode {
1708        id: format!("ep:{}", ep.memory_id),
1709        kind: NodeKind::Episode.as_wire_str(),
1710        label: episode_label(&ep.content),
1711        ts_ms: Some(ep.ts_ms),
1712        tenant_id: tenant_id.to_string(),
1713        preview: Some(truncate_preview(&ep.content, GRAPH_PREVIEW_CHARS)),
1714    }
1715}
1716
1717fn graph_node_for_document(tenant_id: &str, d: &ExpandedDocument) -> GraphNode {
1718    let label = d
1719        .title
1720        .clone()
1721        .or_else(|| d.source.clone())
1722        .unwrap_or_else(|| d.doc_id.clone());
1723    GraphNode {
1724        id: format!("doc:{}", d.doc_id),
1725        kind: NodeKind::Document.as_wire_str(),
1726        label: truncate_preview(&label, GRAPH_LABEL_CHARS),
1727        ts_ms: Some(d.ingested_at_ms),
1728        tenant_id: tenant_id.to_string(),
1729        preview: d.source.clone(),
1730    }
1731}
1732
1733fn graph_node_for_chunk(tenant_id: &str, c: &ExpandedChunk) -> GraphNode {
1734    GraphNode {
1735        id: format!("chunk:{}", c.chunk_id),
1736        kind: NodeKind::Chunk.as_wire_str(),
1737        label: format!("chunk #{}: {}", c.chunk_index, episode_label(&c.content)),
1738        ts_ms: None,
1739        tenant_id: tenant_id.to_string(),
1740        preview: Some(truncate_preview(&c.content, GRAPH_PREVIEW_CHARS)),
1741    }
1742}
1743
1744fn graph_node_for_cluster(
1745    tenant_id: &str,
1746    cluster_id: &str,
1747    abstraction: Option<&str>,
1748    created_at_ms: i64,
1749) -> GraphNode {
1750    let label = abstraction
1751        .map(|a| truncate_preview(a, GRAPH_LABEL_CHARS))
1752        .unwrap_or_else(|| format!("cluster {cluster_id}"));
1753    GraphNode {
1754        id: format!("cl:{cluster_id}"),
1755        kind: NodeKind::Cluster.as_wire_str(),
1756        label,
1757        ts_ms: Some(created_at_ms),
1758        tenant_id: tenant_id.to_string(),
1759        preview: abstraction.map(|a| truncate_preview(a, GRAPH_PREVIEW_CHARS)),
1760    }
1761}
1762
1763fn graph_node_for_entity(tenant_id: &str, value: &str) -> GraphNode {
1764    GraphNode {
1765        id: format!("ent:{value}"),
1766        kind: NodeKind::Entity.as_wire_str(),
1767        label: truncate_preview(value, GRAPH_LABEL_CHARS),
1768        ts_ms: None,
1769        tenant_id: tenant_id.to_string(),
1770        preview: None,
1771    }
1772}
1773
1774/// `GET /v1/graph/expand`. See module-level comments for the contract.
1775async fn graph_expand_handler(
1776    TenantExtractor(tenant): TenantExtractor,
1777    Query(q): Query<GraphExpandQuery>,
1778) -> Result<Json<GraphExpandResponse>, ApiError> {
1779    // Silent clamp at GRAPH_EXPAND_MAX_LIMIT — matches the rest of
1780    // solo-query's read pipelines (recall, themes, etc.). Documented in
1781    // the OpenAPI spec.
1782    let limit = q.limit.unwrap_or(GRAPH_EXPAND_DEFAULT_LIMIT);
1783    let limit = limit.clamp(1, GRAPH_EXPAND_MAX_LIMIT) as i64;
1784
1785    let (node_kind, value) = parse_node_id(&q.node_id)?;
1786    let value = value.to_string();
1787    let node_id_full = q.node_id.clone();
1788    let tenant_id_str = tenant.tenant_id().to_string();
1789
1790    match q.kind {
1791        GraphExpandKind::ClusterMember => {
1792            expand_cluster_member(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit)
1793                .await
1794        }
1795        GraphExpandKind::DocumentChunk => {
1796            expand_document_chunk(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit)
1797                .await
1798        }
1799        GraphExpandKind::Triple => {
1800            expand_triple(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit).await
1801        }
1802        GraphExpandKind::Semantic => {
1803            expand_semantic(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit).await
1804        }
1805    }
1806    .map(Json)
1807}
1808
1809// ---- cluster_member ----
1810
1811async fn expand_cluster_member(
1812    tenant: &TenantHandle,
1813    tenant_id: &str,
1814    node_kind: NodeKind,
1815    value: &str,
1816    node_id_full: &str,
1817    limit: i64,
1818) -> Result<GraphExpandResponse, ApiError> {
1819    match node_kind {
1820        NodeKind::Episode => expand_cluster_member_from_episode(
1821            tenant,
1822            tenant_id,
1823            value.to_string(),
1824            node_id_full.to_string(),
1825            limit,
1826        )
1827        .await,
1828        NodeKind::Cluster => expand_cluster_member_from_cluster(
1829            tenant,
1830            tenant_id,
1831            value.to_string(),
1832            node_id_full.to_string(),
1833            limit,
1834        )
1835        .await,
1836        _ => Err(ApiError::bad_request(format!(
1837            "kind=cluster_member only valid for episode or cluster source nodes; got {}",
1838            node_kind.as_wire_str()
1839        ))),
1840    }
1841}
1842
1843async fn expand_cluster_member_from_episode(
1844    tenant: &TenantHandle,
1845    tenant_id: &str,
1846    memory_id: String,
1847    node_id_full: String,
1848    limit: i64,
1849) -> Result<GraphExpandResponse, ApiError> {
1850    let memory_id_for_err = memory_id.clone();
1851    let rows: Vec<(String, Option<String>, i64)> = tenant
1852        .read()
1853        .interact(move |conn| {
1854            // First confirm the source episode exists in this tenant.
1855            let exists: i64 = conn.query_row(
1856                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
1857                rusqlite::params![&memory_id],
1858                |r| r.get(0),
1859            )?;
1860            if exists == 0 {
1861                return Ok(Vec::new());
1862            }
1863            let mut stmt = conn.prepare(
1864                "SELECT c.cluster_id, sa.content, c.created_at_ms
1865                   FROM cluster_episodes ce
1866                   JOIN clusters c ON c.cluster_id = ce.cluster_id
1867                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
1868                  WHERE ce.memory_id = ?1
1869                  ORDER BY c.created_at_ms DESC
1870                  LIMIT ?2",
1871            )?;
1872            let mapped = stmt
1873                .query_map(rusqlite::params![&memory_id, limit], |r| {
1874                    Ok((
1875                        r.get::<_, String>(0)?,
1876                        r.get::<_, Option<String>>(1)?,
1877                        r.get::<_, i64>(2)?,
1878                    ))
1879                })?
1880                .collect::<rusqlite::Result<Vec<_>>>()?;
1881            // Marker tuple to signal "episode found" via Vec emptiness +
1882            // an extra sentinel; we use a different shape:
1883            // pack the "found" flag via an out-of-band trick — actually
1884            // we re-query above. Keep it simple: confirm again here by
1885            // returning the rows; a missing episode short-circuits to
1886            // a 404 below via the `exists == 0` guard.
1887            Ok::<_, rusqlite::Error>(mapped)
1888        })
1889        .await
1890        .map_err(ApiError::from)?;
1891
1892    // The interact() returns Vec<(...)>; but we need to distinguish "no
1893    // such episode" (→ 404) from "episode exists, has no clusters" (→
1894    // 200 with empty arrays). Re-run a cheap existence check separately
1895    // — we already inlined it above and returned `Vec::new()` on miss,
1896    // but a real miss is indistinguishable from "episode in zero
1897    // clusters". Use a separate existence probe.
1898    if rows.is_empty() {
1899        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
1900        return Ok(GraphExpandResponse {
1901            nodes: Vec::new(),
1902            edges: Vec::new(),
1903        });
1904    }
1905
1906    let mut nodes = Vec::with_capacity(rows.len());
1907    let mut edges = Vec::with_capacity(rows.len());
1908    for (cluster_id, abstraction, created_at_ms) in rows {
1909        let target_id = format!("cl:{cluster_id}");
1910        edges.push(GraphEdge {
1911            id: edge_id(&node_id_full, "cluster_member", &target_id),
1912            source: node_id_full.clone(),
1913            target: target_id,
1914            kind: "cluster_member",
1915            predicate: None,
1916            weight: None,
1917        });
1918        nodes.push(graph_node_for_cluster(
1919            tenant_id,
1920            &cluster_id,
1921            abstraction.as_deref(),
1922            created_at_ms,
1923        ));
1924    }
1925    Ok(GraphExpandResponse { nodes, edges })
1926}
1927
1928async fn expand_cluster_member_from_cluster(
1929    tenant: &TenantHandle,
1930    tenant_id: &str,
1931    cluster_id: String,
1932    node_id_full: String,
1933    limit: i64,
1934) -> Result<GraphExpandResponse, ApiError> {
1935    let cluster_id_for_err = cluster_id.clone();
1936    let rows: Vec<ExpandedEpisode> = tenant
1937        .read()
1938        .interact(move |conn| {
1939            let exists: i64 = conn.query_row(
1940                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
1941                rusqlite::params![&cluster_id],
1942                |r| r.get(0),
1943            )?;
1944            if exists == 0 {
1945                return Ok(Vec::new());
1946            }
1947            let mut stmt = conn.prepare(
1948                "SELECT e.memory_id, e.ts_ms, e.content
1949                   FROM cluster_episodes ce
1950                   JOIN episodes e ON e.memory_id = ce.memory_id
1951                  WHERE ce.cluster_id = ?1
1952                    AND e.status = 'active'
1953                  ORDER BY e.ts_ms DESC
1954                  LIMIT ?2",
1955            )?;
1956            let mapped = stmt
1957                .query_map(rusqlite::params![&cluster_id, limit], |r| {
1958                    Ok(ExpandedEpisode {
1959                        memory_id: r.get(0)?,
1960                        ts_ms: r.get(1)?,
1961                        content: r.get(2)?,
1962                    })
1963                })?
1964                .collect::<rusqlite::Result<Vec<_>>>()?;
1965            Ok::<_, rusqlite::Error>(mapped)
1966        })
1967        .await
1968        .map_err(ApiError::from)?;
1969
1970    if rows.is_empty() {
1971        ensure_cluster_exists(tenant, &cluster_id_for_err, &node_id_full).await?;
1972        return Ok(GraphExpandResponse {
1973            nodes: Vec::new(),
1974            edges: Vec::new(),
1975        });
1976    }
1977
1978    let mut nodes = Vec::with_capacity(rows.len());
1979    let mut edges = Vec::with_capacity(rows.len());
1980    for ep in rows {
1981        let target_id = format!("ep:{}", ep.memory_id);
1982        edges.push(GraphEdge {
1983            id: edge_id(&node_id_full, "cluster_member", &target_id),
1984            source: node_id_full.clone(),
1985            target: target_id,
1986            kind: "cluster_member",
1987            predicate: None,
1988            weight: None,
1989        });
1990        nodes.push(graph_node_for_episode(tenant_id, &ep));
1991    }
1992    Ok(GraphExpandResponse { nodes, edges })
1993}
1994
1995// ---- document_chunk ----
1996
1997async fn expand_document_chunk(
1998    tenant: &TenantHandle,
1999    tenant_id: &str,
2000    node_kind: NodeKind,
2001    value: &str,
2002    node_id_full: &str,
2003    limit: i64,
2004) -> Result<GraphExpandResponse, ApiError> {
2005    match node_kind {
2006        NodeKind::Document => expand_document_chunk_from_document(
2007            tenant,
2008            tenant_id,
2009            value.to_string(),
2010            node_id_full.to_string(),
2011            limit,
2012        )
2013        .await,
2014        NodeKind::Chunk => expand_document_chunk_from_chunk(
2015            tenant,
2016            tenant_id,
2017            value.to_string(),
2018            node_id_full.to_string(),
2019        )
2020        .await,
2021        _ => Err(ApiError::bad_request(format!(
2022            "kind=document_chunk only valid for document or chunk source nodes; got {}",
2023            node_kind.as_wire_str()
2024        ))),
2025    }
2026}
2027
2028async fn expand_document_chunk_from_document(
2029    tenant: &TenantHandle,
2030    tenant_id: &str,
2031    doc_id: String,
2032    node_id_full: String,
2033    limit: i64,
2034) -> Result<GraphExpandResponse, ApiError> {
2035    let doc_id_for_err = doc_id.clone();
2036    let rows: Vec<ExpandedChunk> = tenant
2037        .read()
2038        .interact(move |conn| {
2039            let exists: i64 = conn.query_row(
2040                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
2041                rusqlite::params![&doc_id],
2042                |r| r.get(0),
2043            )?;
2044            if exists == 0 {
2045                return Ok(Vec::new());
2046            }
2047            let mut stmt = conn.prepare(
2048                "SELECT chunk_id, chunk_index, content
2049                   FROM document_chunks
2050                  WHERE doc_id = ?1
2051                  ORDER BY chunk_index ASC
2052                  LIMIT ?2",
2053            )?;
2054            let mapped = stmt
2055                .query_map(rusqlite::params![&doc_id, limit], |r| {
2056                    Ok(ExpandedChunk {
2057                        chunk_id: r.get(0)?,
2058                        chunk_index: r.get(1)?,
2059                        content: r.get(2)?,
2060                    })
2061                })?
2062                .collect::<rusqlite::Result<Vec<_>>>()?;
2063            Ok::<_, rusqlite::Error>(mapped)
2064        })
2065        .await
2066        .map_err(ApiError::from)?;
2067
2068    if rows.is_empty() {
2069        ensure_document_exists(tenant, &doc_id_for_err, &node_id_full).await?;
2070        return Ok(GraphExpandResponse {
2071            nodes: Vec::new(),
2072            edges: Vec::new(),
2073        });
2074    }
2075
2076    let mut nodes = Vec::with_capacity(rows.len());
2077    let mut edges = Vec::with_capacity(rows.len());
2078    for c in rows {
2079        let target_id = format!("chunk:{}", c.chunk_id);
2080        edges.push(GraphEdge {
2081            id: edge_id(&node_id_full, "document_chunk", &target_id),
2082            source: node_id_full.clone(),
2083            target: target_id,
2084            kind: "document_chunk",
2085            predicate: None,
2086            weight: None,
2087        });
2088        nodes.push(graph_node_for_chunk(tenant_id, &c));
2089    }
2090    Ok(GraphExpandResponse { nodes, edges })
2091}
2092
2093async fn expand_document_chunk_from_chunk(
2094    tenant: &TenantHandle,
2095    tenant_id: &str,
2096    chunk_id: String,
2097    node_id_full: String,
2098) -> Result<GraphExpandResponse, ApiError> {
2099    let chunk_id_for_err = chunk_id.clone();
2100    let row: Option<ExpandedDocument> = tenant
2101        .read()
2102        .interact(move |conn| {
2103            conn.query_row(
2104                "SELECT d.doc_id, d.title, d.source, d.ingested_at_ms
2105                   FROM document_chunks c
2106                   JOIN documents d ON d.doc_id = c.doc_id
2107                  WHERE c.chunk_id = ?1",
2108                rusqlite::params![&chunk_id],
2109                |r| {
2110                    Ok(ExpandedDocument {
2111                        doc_id: r.get(0)?,
2112                        title: r.get(1)?,
2113                        source: r.get(2)?,
2114                        ingested_at_ms: r.get(3)?,
2115                    })
2116                },
2117            )
2118            .map(Some)
2119            .or_else(|e| match e {
2120                rusqlite::Error::QueryReturnedNoRows => Ok(None),
2121                other => Err(other),
2122            })
2123        })
2124        .await
2125        .map_err(ApiError::from)?;
2126
2127    let d = row.ok_or_else(|| {
2128        ApiError::not_found(format!(
2129            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
2130        ))
2131    })?;
2132    let target_id = format!("doc:{}", d.doc_id);
2133    let edge = GraphEdge {
2134        id: edge_id(&node_id_full, "document_chunk", &target_id),
2135        source: node_id_full.clone(),
2136        target: target_id,
2137        kind: "document_chunk",
2138        predicate: None,
2139        weight: None,
2140    };
2141    let node = graph_node_for_document(tenant_id, &d);
2142    Ok(GraphExpandResponse {
2143        nodes: vec![node],
2144        edges: vec![edge],
2145    })
2146}
2147
2148// ---- triple ----
2149
2150async fn expand_triple(
2151    tenant: &TenantHandle,
2152    tenant_id: &str,
2153    node_kind: NodeKind,
2154    value: &str,
2155    node_id_full: &str,
2156    limit: i64,
2157) -> Result<GraphExpandResponse, ApiError> {
2158    match node_kind {
2159        NodeKind::Episode => expand_triple_from_episode(
2160            tenant,
2161            tenant_id,
2162            value.to_string(),
2163            node_id_full.to_string(),
2164            limit,
2165        )
2166        .await,
2167        NodeKind::Entity => expand_triple_from_entity(
2168            tenant,
2169            tenant_id,
2170            value.to_string(),
2171            node_id_full.to_string(),
2172            limit,
2173        )
2174        .await,
2175        _ => Err(ApiError::bad_request(format!(
2176            "kind=triple only valid for episode or entity source nodes; got {}",
2177            node_kind.as_wire_str()
2178        ))),
2179    }
2180}
2181
2182#[derive(Debug)]
2183struct TripleRow {
2184    subject_id: String,
2185    predicate: String,
2186    object_id: String,
2187    confidence: f32,
2188}
2189
2190async fn expand_triple_from_episode(
2191    tenant: &TenantHandle,
2192    tenant_id: &str,
2193    memory_id: String,
2194    node_id_full: String,
2195    limit: i64,
2196) -> Result<GraphExpandResponse, ApiError> {
2197    let memory_id_for_err = memory_id.clone();
2198    let rows: Vec<TripleRow> = tenant
2199        .read()
2200        .interact(move |conn| {
2201            // Episode rowid lookup (triples FK is INTEGER rowid, not memory_id).
2202            let rowid_opt: Option<i64> = conn
2203                .query_row(
2204                    "SELECT rowid FROM episodes WHERE memory_id = ?1",
2205                    rusqlite::params![&memory_id],
2206                    |r| r.get(0),
2207                )
2208                .map(Some)
2209                .or_else(|e| match e {
2210                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
2211                    other => Err(other),
2212                })?;
2213            let Some(rowid) = rowid_opt else {
2214                return Ok(Vec::new());
2215            };
2216            let mut stmt = conn.prepare(
2217                "SELECT subject_id, predicate, object_id, confidence
2218                   FROM triples
2219                  WHERE source_episode_id = ?1
2220                    AND status = 'active'
2221                  ORDER BY valid_from_ms DESC
2222                  LIMIT ?2",
2223            )?;
2224            let mapped = stmt
2225                .query_map(rusqlite::params![rowid, limit], |r| {
2226                    Ok(TripleRow {
2227                        subject_id: r.get(0)?,
2228                        predicate: r.get(1)?,
2229                        object_id: r.get(2)?,
2230                        confidence: r.get(3)?,
2231                    })
2232                })?
2233                .collect::<rusqlite::Result<Vec<_>>>()?;
2234            Ok::<_, rusqlite::Error>(mapped)
2235        })
2236        .await
2237        .map_err(ApiError::from)?;
2238
2239    if rows.is_empty() {
2240        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
2241        return Ok(GraphExpandResponse {
2242            nodes: Vec::new(),
2243            edges: Vec::new(),
2244        });
2245    }
2246
2247    let mut nodes = Vec::new();
2248    let mut edges = Vec::new();
2249    let mut seen_entities: std::collections::HashSet<String> = Default::default();
2250    for t in rows {
2251        // Mint both endpoints as entity nodes. The source episode is
2252        // node_id_full; each triple becomes two edges (source→subj +
2253        // subj→obj) connected through the entity nodes, OR a single
2254        // edge labelled with the predicate from the source episode to
2255        // a representative entity. The TS schema treats `triple` as a
2256        // single edge with `predicate`; we emit one edge per triple:
2257        // source_episode → subject_entity (kind=triple, predicate=p),
2258        // plus one extra edge subject_entity → object_entity (also
2259        // kind=triple, same predicate) so a renderer can hop along the
2260        // SPO graph.
2261        let subj_id = format!("ent:{}", t.subject_id);
2262        let obj_id = format!("ent:{}", t.object_id);
2263        if seen_entities.insert(t.subject_id.clone()) {
2264            nodes.push(graph_node_for_entity(tenant_id, &t.subject_id));
2265        }
2266        if seen_entities.insert(t.object_id.clone()) {
2267            nodes.push(graph_node_for_entity(tenant_id, &t.object_id));
2268        }
2269        edges.push(GraphEdge {
2270            id: edge_id(&subj_id, "triple", &obj_id),
2271            source: subj_id,
2272            target: obj_id,
2273            kind: "triple",
2274            predicate: Some(t.predicate),
2275            weight: Some(t.confidence),
2276        });
2277    }
2278    Ok(GraphExpandResponse { nodes, edges })
2279}
2280
2281async fn expand_triple_from_entity(
2282    tenant: &TenantHandle,
2283    tenant_id: &str,
2284    entity_value: String,
2285    node_id_full: String,
2286    limit: i64,
2287) -> Result<GraphExpandResponse, ApiError> {
2288    // Entity nodes are synthetic — there's no existence check we can
2289    // run. "Unknown entity" naturally resolves to an empty result.
2290    let entity_q = entity_value.clone();
2291    let rows: Vec<ExpandedEpisode> = tenant
2292        .read()
2293        .interact(move |conn| {
2294            // Find episodes whose triples reference this entity on either
2295            // side. JOIN against episodes.rowid via triples.source_episode_id.
2296            let mut stmt = conn.prepare(
2297                "SELECT DISTINCT e.memory_id, e.ts_ms, e.content
2298                   FROM triples t
2299                   JOIN episodes e ON e.rowid = t.source_episode_id
2300                  WHERE (t.subject_id = ?1 OR t.object_id = ?1)
2301                    AND t.status = 'active'
2302                    AND t.source_episode_id IS NOT NULL
2303                    AND e.status = 'active'
2304                  ORDER BY e.ts_ms DESC
2305                  LIMIT ?2",
2306            )?;
2307            let mapped = stmt
2308                .query_map(rusqlite::params![&entity_q, limit], |r| {
2309                    Ok(ExpandedEpisode {
2310                        memory_id: r.get(0)?,
2311                        ts_ms: r.get(1)?,
2312                        content: r.get(2)?,
2313                    })
2314                })?
2315                .collect::<rusqlite::Result<Vec<_>>>()?;
2316            Ok::<_, rusqlite::Error>(mapped)
2317        })
2318        .await
2319        .map_err(ApiError::from)?;
2320
2321    // Empty result on entity expand is a valid 200 — the entity exists
2322    // only in the wire format; "no edges" is the right answer.
2323    let mut nodes = Vec::with_capacity(rows.len());
2324    let mut edges = Vec::with_capacity(rows.len());
2325    for ep in rows {
2326        let target_id = format!("ep:{}", ep.memory_id);
2327        edges.push(GraphEdge {
2328            id: edge_id(&node_id_full, "triple", &target_id),
2329            source: node_id_full.clone(),
2330            target: target_id,
2331            kind: "triple",
2332            predicate: None,
2333            weight: None,
2334        });
2335        nodes.push(graph_node_for_episode(tenant_id, &ep));
2336    }
2337    // Annotate _ to suppress unused (only used in match guard).
2338    let _ = entity_value;
2339    Ok(GraphExpandResponse { nodes, edges })
2340}
2341
2342// ---- semantic ----
2343
2344async fn expand_semantic(
2345    tenant: &TenantHandle,
2346    tenant_id: &str,
2347    node_kind: NodeKind,
2348    value: &str,
2349    node_id_full: &str,
2350    limit: i64,
2351) -> Result<GraphExpandResponse, ApiError> {
2352    if node_kind != NodeKind::Episode {
2353        return Err(ApiError::bad_request(format!(
2354            "kind=semantic only valid for episode source nodes; got {}",
2355            node_kind.as_wire_str()
2356        )));
2357    }
2358    let memory_id = value.to_string();
2359    let memory_id_q = memory_id.clone();
2360    // Fetch the source episode's content so we can re-embed it and call
2361    // the existing HNSW pipeline. Cheaper-than-extra-machinery: reuses
2362    // the well-tested `run_recall_inner` path that already filters
2363    // forgotten rows + decodes hnsw ids.
2364    let content: Option<String> = tenant
2365        .read()
2366        .interact(move |conn| {
2367            conn.query_row(
2368                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
2369                rusqlite::params![&memory_id_q],
2370                |r| r.get::<_, String>(0),
2371            )
2372            .map(Some)
2373            .or_else(|e| match e {
2374                rusqlite::Error::QueryReturnedNoRows => Ok(None),
2375                other => Err(other),
2376            })
2377        })
2378        .await
2379        .map_err(ApiError::from)?;
2380
2381    let content = content.ok_or_else(|| {
2382        ApiError::not_found(format!(
2383            "node_id {node_id_full:?} (memory_id {memory_id}) not found in current tenant"
2384        ))
2385    })?;
2386
2387    // Pull one extra hit so we can drop self without losing user-requested
2388    // count. limit is already ≤ MAX_LIMIT; +1 stays within reason.
2389    let widened = (limit as usize).saturating_add(1).min(100);
2390    let result = solo_query::recall::run_recall_inner(
2391        tenant.embedder(),
2392        tenant.hnsw(),
2393        tenant.read(),
2394        &content,
2395        widened,
2396    )
2397    .await
2398    .map_err(ApiError::from)?;
2399
2400    let mut nodes = Vec::new();
2401    let mut edges = Vec::new();
2402    for hit in result.hits.into_iter() {
2403        if hit.memory_id == memory_id {
2404            // Skip self.
2405            continue;
2406        }
2407        if nodes.len() as i64 >= limit {
2408            break;
2409        }
2410        // The HNSW `cos_distance` is a distance (smaller = more similar).
2411        // Convert to a weight in [0, 1] (larger = more similar) for the
2412        // wire format: weight = (1 - distance).max(0).
2413        let weight = (1.0 - hit.cos_distance).max(0.0);
2414        let target_id = format!("ep:{}", hit.memory_id);
2415        edges.push(GraphEdge {
2416            id: edge_id(node_id_full, "semantic", &target_id),
2417            source: node_id_full.to_string(),
2418            target: target_id,
2419            kind: "semantic",
2420            predicate: None,
2421            weight: Some(weight),
2422        });
2423        nodes.push(GraphNode {
2424            id: format!("ep:{}", hit.memory_id),
2425            kind: NodeKind::Episode.as_wire_str(),
2426            label: episode_label(&hit.content),
2427            ts_ms: None,
2428            tenant_id: tenant_id.to_string(),
2429            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
2430        });
2431    }
2432    Ok(GraphExpandResponse { nodes, edges })
2433}
2434
2435// ---- existence checks ----
2436
2437/// 404 if the memory_id has no row in this tenant's `episodes` table.
2438async fn ensure_episode_exists(
2439    tenant: &TenantHandle,
2440    memory_id: &str,
2441    node_id_full: &str,
2442) -> Result<(), ApiError> {
2443    let memory_id_q = memory_id.to_string();
2444    let exists: i64 = tenant
2445        .read()
2446        .interact(move |conn| {
2447            conn.query_row(
2448                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
2449                rusqlite::params![&memory_id_q],
2450                |r| r.get(0),
2451            )
2452        })
2453        .await
2454        .map_err(ApiError::from)?;
2455    if exists == 0 {
2456        return Err(ApiError::not_found(format!(
2457            "node_id {node_id_full:?} not found in current tenant"
2458        )));
2459    }
2460    Ok(())
2461}
2462
2463async fn ensure_cluster_exists(
2464    tenant: &TenantHandle,
2465    cluster_id: &str,
2466    node_id_full: &str,
2467) -> Result<(), ApiError> {
2468    let cluster_id_q = cluster_id.to_string();
2469    let exists: i64 = tenant
2470        .read()
2471        .interact(move |conn| {
2472            conn.query_row(
2473                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
2474                rusqlite::params![&cluster_id_q],
2475                |r| r.get(0),
2476            )
2477        })
2478        .await
2479        .map_err(ApiError::from)?;
2480    if exists == 0 {
2481        return Err(ApiError::not_found(format!(
2482            "node_id {node_id_full:?} not found in current tenant"
2483        )));
2484    }
2485    Ok(())
2486}
2487
2488async fn ensure_document_exists(
2489    tenant: &TenantHandle,
2490    doc_id: &str,
2491    node_id_full: &str,
2492) -> Result<(), ApiError> {
2493    let doc_id_q = doc_id.to_string();
2494    let exists: i64 = tenant
2495        .read()
2496        .interact(move |conn| {
2497            conn.query_row(
2498                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
2499                rusqlite::params![&doc_id_q],
2500                |r| r.get(0),
2501            )
2502        })
2503        .await
2504        .map_err(ApiError::from)?;
2505    if exists == 0 {
2506        return Err(ApiError::not_found(format!(
2507            "node_id {node_id_full:?} not found in current tenant"
2508        )));
2509    }
2510    Ok(())
2511}
2512
2513// ---------------------------------------------------------------------------
2514// Graph nodes + edges — paginated catalog reads (v0.10.0)
2515//
2516// `GET /v1/graph/nodes` and `GET /v1/graph/edges` are the bundle that
2517// powers solo-web's initial graph render. Both are read-only, both
2518// share the same tenant / auth / cursor scaffolding, both inherit the
2519// node-id prefix convention from `/v1/graph/expand` (ep:/doc:/chunk:/cl:/ent:).
2520//
2521// See `docs/dev-log/0114-graph-nodes-edges-impl.md` for the design
2522// notes (cursor format, entity scan strategy, semantic-edge rejection
2523// rationale, UNION pagination shape).
2524// ---------------------------------------------------------------------------
2525
2526const GRAPH_NODES_DEFAULT_LIMIT: u32 = 100;
2527const GRAPH_NODES_MAX_LIMIT: u32 = 1000;
2528const GRAPH_EDGES_DEFAULT_LIMIT: u32 = 200;
2529const GRAPH_EDGES_MAX_LIMIT: u32 = 2000;
2530const GRAPH_ENTITY_CAP: usize = 200;
2531
2532/// Header set when the entity scan hit `GRAPH_ENTITY_CAP` and lower-
2533/// frequency entities were dropped from the response. Clients can show
2534/// "entities truncated" UX without parsing the body.
2535const ENTITY_CAP_HEADER: &str = "x-solo-entity-cap-reached";
2536
2537#[derive(Debug, Deserialize)]
2538struct GraphNodesQuery {
2539    /// Comma-separated kinds. Empty/missing = all five kinds. Repeated
2540    /// `?kind=` query params are NOT supported by axum's `Query<T>`
2541    /// extractor for `Option<String>` (it picks one) — comma-separated
2542    /// is documented + simpler. Values: episode|document|chunk|cluster|entity.
2543    #[serde(default)]
2544    kind: Option<String>,
2545    #[serde(default)]
2546    since_ms: Option<i64>,
2547    #[serde(default)]
2548    until_ms: Option<i64>,
2549    #[serde(default)]
2550    limit: Option<u32>,
2551    #[serde(default)]
2552    cursor: Option<String>,
2553}
2554
2555#[derive(Debug, Deserialize)]
2556struct GraphEdgesQuery {
2557    #[serde(default)]
2558    node_id: Option<String>,
2559    /// Comma-separated. Default = all kinds EXCEPT semantic.
2560    /// Values: triple|document_chunk|cluster_member|semantic.
2561    #[serde(default)]
2562    r#type: Option<String>,
2563    #[serde(default)]
2564    limit: Option<u32>,
2565    #[serde(default)]
2566    cursor: Option<String>,
2567}
2568
2569#[derive(Debug, Serialize)]
2570struct GraphNodesResponse {
2571    nodes: Vec<GraphNode>,
2572    #[serde(skip_serializing_if = "Option::is_none")]
2573    next_cursor: Option<String>,
2574}
2575
2576#[derive(Debug, Serialize)]
2577struct GraphEdgesResponse {
2578    edges: Vec<GraphEdge>,
2579    #[serde(skip_serializing_if = "Option::is_none")]
2580    next_cursor: Option<String>,
2581}
2582
2583/// Decode the `kind` filter from the query string. Returns the set of
2584/// kinds the caller wants (all five when filter absent / empty). 400 on
2585/// unknown kind.
2586fn parse_node_kind_filter(raw: Option<&str>) -> Result<Vec<NodeKind>, ApiError> {
2587    let raw = raw.unwrap_or("").trim();
2588    if raw.is_empty() {
2589        return Ok(vec![
2590            NodeKind::Episode,
2591            NodeKind::Document,
2592            NodeKind::Chunk,
2593            NodeKind::Cluster,
2594            NodeKind::Entity,
2595        ]);
2596    }
2597    let mut out = Vec::new();
2598    for token in raw.split(',') {
2599        let token = token.trim();
2600        if token.is_empty() {
2601            continue;
2602        }
2603        let kind = match token {
2604            "episode" => NodeKind::Episode,
2605            "document" => NodeKind::Document,
2606            "chunk" => NodeKind::Chunk,
2607            "cluster" => NodeKind::Cluster,
2608            "entity" => NodeKind::Entity,
2609            other => {
2610                return Err(ApiError::bad_request(format!(
2611                    "unknown node kind {other:?}; expected one of episode/document/chunk/cluster/entity"
2612                )));
2613            }
2614        };
2615        if !out.contains(&kind) {
2616            out.push(kind);
2617        }
2618    }
2619    if out.is_empty() {
2620        return Err(ApiError::bad_request(
2621            "kind filter is empty after parsing; either omit or list at least one kind",
2622        ));
2623    }
2624    Ok(out)
2625}
2626
2627/// Edge-kind discriminator on `/v1/graph/edges`.
2628#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2629enum EdgeKind {
2630    Triple,
2631    DocumentChunk,
2632    ClusterMember,
2633}
2634
2635impl EdgeKind {
2636    /// Sort-stable kind ordering for pagination. Lower runs first.
2637    fn order_idx(self) -> u8 {
2638        match self {
2639            Self::Triple => 0,
2640            Self::DocumentChunk => 1,
2641            Self::ClusterMember => 2,
2642        }
2643    }
2644}
2645
2646fn parse_edge_kind_filter(raw: Option<&str>) -> Result<Vec<EdgeKind>, ApiError> {
2647    let raw = raw.unwrap_or("").trim();
2648    if raw.is_empty() {
2649        // Default = all three concrete kinds; semantic is opt-in via
2650        // /v1/graph/neighbors/:id (per scoping doc §3 Decision B).
2651        return Ok(vec![
2652            EdgeKind::Triple,
2653            EdgeKind::DocumentChunk,
2654            EdgeKind::ClusterMember,
2655        ]);
2656    }
2657    let mut out = Vec::new();
2658    for token in raw.split(',') {
2659        let token = token.trim();
2660        if token.is_empty() {
2661            continue;
2662        }
2663        let kind = match token {
2664            "triple" => EdgeKind::Triple,
2665            "document_chunk" => EdgeKind::DocumentChunk,
2666            "cluster_member" => EdgeKind::ClusterMember,
2667            "semantic" => {
2668                // semantic edges aren't precomputed; they're HNSW queries
2669                // at request time. Wrong endpoint.
2670                return Err(ApiError::bad_request(
2671                    "semantic edges are available via /v1/graph/neighbors/:id?kind=semantic, not /v1/graph/edges (semantic edges aren't precomputed; they're query-time HNSW lookups)",
2672                ));
2673            }
2674            other => {
2675                return Err(ApiError::bad_request(format!(
2676                    "unknown edge type {other:?}; expected one of triple/document_chunk/cluster_member"
2677                )));
2678            }
2679        };
2680        if !out.contains(&kind) {
2681            out.push(kind);
2682        }
2683    }
2684    if out.is_empty() {
2685        return Err(ApiError::bad_request(
2686            "type filter is empty after parsing; either omit or list at least one type",
2687        ));
2688    }
2689    Ok(out)
2690}
2691
2692/// Opaque cursor for `/v1/graph/nodes`. Encodes the last item's
2693/// `(ts_ms, id)` so the next page is `WHERE (ts_ms, id) < (cursor.ts_ms,
2694/// cursor.id)` under sort `ts_ms DESC, id ASC`.
2695#[derive(Debug, Serialize, Deserialize)]
2696struct NodesCursor {
2697    ts_ms: i64,
2698    id: String,
2699}
2700
2701/// Opaque cursor for `/v1/graph/edges`. Encodes the last item's
2702/// `(kind_idx, sub_id)` so the next page resumes at `> cursor` under
2703/// sort `(kind_idx ASC, sub_id ASC)`. `sub_id` is the per-kind stable
2704/// row id (triple_id for triples, chunk_id for document_chunk, the
2705/// composite `cluster_id||memory_id` string for cluster_member).
2706#[derive(Debug, Serialize, Deserialize)]
2707struct EdgesCursor {
2708    kind_idx: u8,
2709    sub_id: String,
2710}
2711
2712fn encode_cursor<T: Serialize>(value: &T) -> Result<String, ApiError> {
2713    use base64::Engine;
2714    let json = serde_json::to_vec(value).map_err(|e| {
2715        ApiError::internal(format!("cursor serialize: {e}"))
2716    })?;
2717    Ok(base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(json))
2718}
2719
2720fn decode_cursor<T: for<'de> Deserialize<'de>>(raw: &str) -> Result<T, ApiError> {
2721    use base64::Engine;
2722    let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
2723        .decode(raw.as_bytes())
2724        .map_err(|e| ApiError::bad_request(format!("cursor: bad base64: {e}")))?;
2725    serde_json::from_slice::<T>(&bytes)
2726        .map_err(|e| ApiError::bad_request(format!("cursor: bad JSON payload: {e}")))
2727}
2728
2729/// Internal staging row for the nodes endpoint. Carries the GraphNode
2730/// plus the sort key so we can merge all kinds before applying the
2731/// pagination cut.
2732#[derive(Debug)]
2733struct StagingNode {
2734    node: GraphNode,
2735    sort_ts_ms: i64,
2736    sort_id: String,
2737}
2738
2739/// Apply `ts_ms DESC, id ASC` ordering. (Newest first, deterministic
2740/// tie-break on id.)
2741fn cmp_node_sort_keys(a: (i64, &str), b: (i64, &str)) -> std::cmp::Ordering {
2742    // ts_ms DESC: invert
2743    match b.0.cmp(&a.0) {
2744        std::cmp::Ordering::Equal => a.1.cmp(b.1), // id ASC
2745        other => other,
2746    }
2747}
2748
2749/// True if `(ts_ms, id)` strictly comes AFTER `cursor` under the canonical
2750/// sort `ts_ms DESC, id ASC` — i.e. is admissible into a page following
2751/// the cursor.
2752fn node_passes_cursor(ts_ms: i64, id: &str, cursor: &NodesCursor) -> bool {
2753    cmp_node_sort_keys((ts_ms, id), (cursor.ts_ms, cursor.id.as_str()))
2754        == std::cmp::Ordering::Greater
2755}
2756
2757// --- Per-kind row fetchers (each runs a bounded query, applies the time
2758//     filter, returns rows already sorted `ts_ms DESC, id ASC`).
2759
2760#[derive(Debug)]
2761struct NodeRowEp {
2762    memory_id: String,
2763    ts_ms: i64,
2764    content: String,
2765}
2766
2767fn fetch_episodes_for_nodes(
2768    conn: &rusqlite::Connection,
2769    since_ms: Option<i64>,
2770    until_ms: Option<i64>,
2771    cursor: Option<&NodesCursor>,
2772    limit: i64,
2773) -> rusqlite::Result<Vec<NodeRowEp>> {
2774    let mut sql = String::from(
2775        "SELECT memory_id, ts_ms, content
2776           FROM episodes
2777          WHERE status = 'active'",
2778    );
2779    let mut params: Vec<rusqlite::types::Value> = Vec::new();
2780    if let Some(s) = since_ms {
2781        sql.push_str(" AND ts_ms >= ?");
2782        params.push(s.into());
2783    }
2784    if let Some(u) = until_ms {
2785        sql.push_str(" AND ts_ms <= ?");
2786        params.push(u.into());
2787    }
2788    // Cursor pre-filter: under sort `ts_ms DESC, prefixed_id ASC`,
2789    // anything strictly newer than the cursor's ts_ms is in a previous
2790    // page; rows with equal ts_ms may or may not be (depends on the
2791    // cross-kind ordering). The post-merge step applies the full
2792    // `(ts_ms, prefixed_id)` comparison; here we just discard rows
2793    // that can't possibly survive.
2794    if let Some(cur) = cursor {
2795        sql.push_str(" AND ts_ms <= ?");
2796        params.push(cur.ts_ms.into());
2797    }
2798    sql.push_str(" ORDER BY ts_ms DESC, memory_id ASC LIMIT ?");
2799    params.push(limit.into());
2800    let mut stmt = conn.prepare(&sql)?;
2801    let rows: Vec<NodeRowEp> = stmt
2802        .query_map(rusqlite::params_from_iter(params), |r| {
2803            Ok(NodeRowEp {
2804                memory_id: r.get(0)?,
2805                ts_ms: r.get(1)?,
2806                content: r.get(2)?,
2807            })
2808        })?
2809        .collect::<rusqlite::Result<Vec<_>>>()?;
2810    Ok(rows)
2811}
2812
2813#[derive(Debug)]
2814struct NodeRowDoc {
2815    doc_id: String,
2816    title: Option<String>,
2817    source: Option<String>,
2818    ingested_at_ms: i64,
2819}
2820
2821fn fetch_documents_for_nodes(
2822    conn: &rusqlite::Connection,
2823    since_ms: Option<i64>,
2824    until_ms: Option<i64>,
2825    cursor: Option<&NodesCursor>,
2826    limit: i64,
2827) -> rusqlite::Result<Vec<NodeRowDoc>> {
2828    let mut sql = String::from(
2829        "SELECT doc_id, title, source, ingested_at_ms
2830           FROM documents
2831          WHERE status = 'active'",
2832    );
2833    let mut params: Vec<rusqlite::types::Value> = Vec::new();
2834    if let Some(s) = since_ms {
2835        sql.push_str(" AND ingested_at_ms >= ?");
2836        params.push(s.into());
2837    }
2838    if let Some(u) = until_ms {
2839        sql.push_str(" AND ingested_at_ms <= ?");
2840        params.push(u.into());
2841    }
2842    if let Some(cur) = cursor {
2843        sql.push_str(" AND ingested_at_ms <= ?");
2844        params.push(cur.ts_ms.into());
2845    }
2846    sql.push_str(" ORDER BY ingested_at_ms DESC, doc_id ASC LIMIT ?");
2847    params.push(limit.into());
2848    let mut stmt = conn.prepare(&sql)?;
2849    let rows: Vec<NodeRowDoc> = stmt
2850        .query_map(rusqlite::params_from_iter(params), |r| {
2851            Ok(NodeRowDoc {
2852                doc_id: r.get(0)?,
2853                title: r.get(1)?,
2854                source: r.get(2)?,
2855                ingested_at_ms: r.get(3)?,
2856            })
2857        })?
2858        .collect::<rusqlite::Result<Vec<_>>>()?;
2859    Ok(rows)
2860}
2861
2862#[derive(Debug)]
2863struct NodeRowChunk {
2864    chunk_id: String,
2865    chunk_index: i64,
2866    content: String,
2867    created_at_ms: i64,
2868}
2869
2870fn fetch_chunks_for_nodes(
2871    conn: &rusqlite::Connection,
2872    since_ms: Option<i64>,
2873    until_ms: Option<i64>,
2874    cursor: Option<&NodesCursor>,
2875    limit: i64,
2876) -> rusqlite::Result<Vec<NodeRowChunk>> {
2877    // Filter by `document_chunks.created_at_ms`; chunks of forgotten
2878    // documents are filtered out by the join on `documents.status`.
2879    let mut sql = String::from(
2880        "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
2881           FROM document_chunks c
2882           JOIN documents d ON d.doc_id = c.doc_id
2883          WHERE d.status = 'active'",
2884    );
2885    let mut params: Vec<rusqlite::types::Value> = Vec::new();
2886    if let Some(s) = since_ms {
2887        sql.push_str(" AND c.created_at_ms >= ?");
2888        params.push(s.into());
2889    }
2890    if let Some(u) = until_ms {
2891        sql.push_str(" AND c.created_at_ms <= ?");
2892        params.push(u.into());
2893    }
2894    if let Some(cur) = cursor {
2895        sql.push_str(" AND c.created_at_ms <= ?");
2896        params.push(cur.ts_ms.into());
2897    }
2898    sql.push_str(" ORDER BY c.created_at_ms DESC, c.chunk_id ASC LIMIT ?");
2899    params.push(limit.into());
2900    let mut stmt = conn.prepare(&sql)?;
2901    let rows: Vec<NodeRowChunk> = stmt
2902        .query_map(rusqlite::params_from_iter(params), |r| {
2903            Ok(NodeRowChunk {
2904                chunk_id: r.get(0)?,
2905                chunk_index: r.get(1)?,
2906                content: r.get(2)?,
2907                created_at_ms: r.get(3)?,
2908            })
2909        })?
2910        .collect::<rusqlite::Result<Vec<_>>>()?;
2911    Ok(rows)
2912}
2913
2914#[derive(Debug)]
2915struct NodeRowCluster {
2916    cluster_id: String,
2917    abstraction: Option<String>,
2918    created_at_ms: i64,
2919}
2920
2921fn fetch_clusters_for_nodes(
2922    conn: &rusqlite::Connection,
2923    since_ms: Option<i64>,
2924    until_ms: Option<i64>,
2925    cursor: Option<&NodesCursor>,
2926    limit: i64,
2927) -> rusqlite::Result<Vec<NodeRowCluster>> {
2928    // clusters has no `status` column; LEFT JOIN abstractions for the
2929    // optional label.
2930    let mut sql = String::from(
2931        "SELECT c.cluster_id, sa.content, c.created_at_ms
2932           FROM clusters c
2933           LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
2934          WHERE 1=1",
2935    );
2936    let mut params: Vec<rusqlite::types::Value> = Vec::new();
2937    if let Some(s) = since_ms {
2938        sql.push_str(" AND c.created_at_ms >= ?");
2939        params.push(s.into());
2940    }
2941    if let Some(u) = until_ms {
2942        sql.push_str(" AND c.created_at_ms <= ?");
2943        params.push(u.into());
2944    }
2945    if let Some(cur) = cursor {
2946        sql.push_str(" AND c.created_at_ms <= ?");
2947        params.push(cur.ts_ms.into());
2948    }
2949    sql.push_str(" ORDER BY c.created_at_ms DESC, c.cluster_id ASC LIMIT ?");
2950    params.push(limit.into());
2951    let mut stmt = conn.prepare(&sql)?;
2952    let rows: Vec<NodeRowCluster> = stmt
2953        .query_map(rusqlite::params_from_iter(params), |r| {
2954            Ok(NodeRowCluster {
2955                cluster_id: r.get(0)?,
2956                abstraction: r.get(1)?,
2957                created_at_ms: r.get(2)?,
2958            })
2959        })?
2960        .collect::<rusqlite::Result<Vec<_>>>()?;
2961    Ok(rows)
2962}
2963
2964#[derive(Debug)]
2965struct NodeRowEntity {
2966    value: String,
2967    ref_count: i64,
2968    first_seen_ms: i64,
2969}
2970
2971/// Synthesize entity nodes from the triples table. Caps result at
2972/// `GRAPH_ENTITY_CAP`, ordered by `ref_count DESC` so the loudest
2973/// entities make the cut. Returns (rows, cap_reached).
2974///
2975/// **Cost**: this is O(N) over active triples per request. For tenants
2976/// with >100k triples this can be noticeable; v0.10.x can cache the
2977/// rollup if profiling justifies it. The 200-row cap keeps the wire
2978/// payload bounded regardless.
2979fn fetch_entities_for_nodes(
2980    conn: &rusqlite::Connection,
2981    since_ms: Option<i64>,
2982    until_ms: Option<i64>,
2983    cursor: Option<&NodesCursor>,
2984) -> rusqlite::Result<(Vec<NodeRowEntity>, bool)> {
2985    // Pull subject + object columns, group by value, compute count + min
2986    // ts_ms. UNION ALL the two columns into a single aggregation. Apply
2987    // time filter against `valid_from_ms` (the closest analogue to "when
2988    // was this entity first referenced").
2989    let mut sql = String::from(
2990        "WITH all_refs AS (
2991            SELECT subject_id AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
2992            UNION ALL
2993            SELECT object_id  AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
2994         )
2995         SELECT value, COUNT(*) AS ref_count, MIN(ts_ms) AS first_seen_ms
2996           FROM all_refs
2997          WHERE 1=1",
2998    );
2999    let mut params: Vec<rusqlite::types::Value> = Vec::new();
3000    if let Some(s) = since_ms {
3001        sql.push_str(" AND ts_ms >= ?");
3002        params.push(s.into());
3003    }
3004    if let Some(u) = until_ms {
3005        sql.push_str(" AND ts_ms <= ?");
3006        params.push(u.into());
3007    }
3008    // Cursor: drop entities whose first_seen_ms strictly newer than the
3009    // cursor. We can't predicate on COUNT() until after GROUP BY, so the
3010    // cap-applicable filter sits in the HAVING clause.
3011    sql.push_str(" GROUP BY value");
3012    if let Some(ts) = cursor.map(|c| c.ts_ms) {
3013        sql.push_str(" HAVING MIN(ts_ms) <= ?");
3014        params.push(ts.into());
3015    }
3016    // Over-fetch by one to detect "cap reached".
3017    let want = GRAPH_ENTITY_CAP as i64 + 1;
3018    sql.push_str(" ORDER BY ref_count DESC, value ASC LIMIT ?");
3019    params.push(want.into());
3020    let mut stmt = conn.prepare(&sql)?;
3021    let rows: Vec<NodeRowEntity> = stmt
3022        .query_map(rusqlite::params_from_iter(params), |r| {
3023            Ok(NodeRowEntity {
3024                value: r.get(0)?,
3025                ref_count: r.get(1)?,
3026                first_seen_ms: r.get(2)?,
3027            })
3028        })?
3029        .collect::<rusqlite::Result<Vec<_>>>()?;
3030    let cap_reached = rows.len() > GRAPH_ENTITY_CAP;
3031    let mut trimmed = rows;
3032    if cap_reached {
3033        trimmed.truncate(GRAPH_ENTITY_CAP);
3034    }
3035    Ok((trimmed, cap_reached))
3036}
3037
3038/// `GET /v1/graph/nodes`. Paginated node catalog across the tenant.
3039/// See module-level comments for the contract.
3040async fn graph_nodes_handler(
3041    TenantExtractor(tenant): TenantExtractor,
3042    Query(q): Query<GraphNodesQuery>,
3043) -> Result<Response, ApiError> {
3044    let limit = q.limit.unwrap_or(GRAPH_NODES_DEFAULT_LIMIT);
3045    let limit = limit.clamp(1, GRAPH_NODES_MAX_LIMIT);
3046    let kinds = parse_node_kind_filter(q.kind.as_deref())?;
3047    let since_ms = q.since_ms;
3048    let until_ms = q.until_ms;
3049    if let (Some(s), Some(u)) = (since_ms, until_ms) {
3050        if s > u {
3051            return Err(ApiError::bad_request(format!(
3052                "since_ms ({s}) must be <= until_ms ({u})"
3053            )));
3054        }
3055    }
3056    let cursor = match q.cursor.as_deref() {
3057        None => None,
3058        Some("") => None,
3059        Some(raw) => Some(decode_cursor::<NodesCursor>(raw)?),
3060    };
3061    let want_episode = kinds.contains(&NodeKind::Episode);
3062    let want_document = kinds.contains(&NodeKind::Document);
3063    let want_chunk = kinds.contains(&NodeKind::Chunk);
3064    let want_cluster = kinds.contains(&NodeKind::Cluster);
3065    let want_entity = kinds.contains(&NodeKind::Entity);
3066
3067    // Over-fetch `limit + 2` per kind:
3068    //   * `+1` so the merge step can detect "more rows available beyond
3069    //     this page" → emits a `next_cursor` instead of None.
3070    //   * `+1` again because the SQL pre-filter `ts_ms <= cursor.ts_ms`
3071    //     can pull the previous page's last item back in; the post-merge
3072    //     cursor predicate drops it, costing one row of headroom.
3073    // The entity cap stays at GRAPH_ENTITY_CAP — entities are bounded
3074    // independently by the response cap, not the page limit.
3075    let per_kind_limit = (limit as i64).saturating_add(2);
3076    let tenant_id_for_blocking = tenant.tenant_id().to_string();
3077    let cursor_clone = cursor.as_ref().map(|c| NodesCursor {
3078        ts_ms: c.ts_ms,
3079        id: c.id.clone(),
3080    });
3081
3082    let (mut staged, cap_reached) = tenant
3083        .read()
3084        .interact(move |conn| {
3085            let mut staged: Vec<StagingNode> = Vec::new();
3086            let mut cap_reached = false;
3087            let cursor_ref = cursor_clone.as_ref();
3088
3089            if want_episode {
3090                let eps = fetch_episodes_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3091                for ep in eps {
3092                    let id = format!("ep:{}", ep.memory_id);
3093                    let exp = ExpandedEpisode {
3094                        memory_id: ep.memory_id,
3095                        ts_ms: ep.ts_ms,
3096                        content: ep.content,
3097                    };
3098                    let node = graph_node_for_episode(&tenant_id_for_blocking, &exp);
3099                    staged.push(StagingNode {
3100                        sort_ts_ms: ep.ts_ms,
3101                        sort_id: id.clone(),
3102                        node,
3103                    });
3104                }
3105            }
3106            if want_document {
3107                let docs = fetch_documents_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3108                for d in docs {
3109                    let id = format!("doc:{}", d.doc_id);
3110                    let exp = ExpandedDocument {
3111                        doc_id: d.doc_id,
3112                        title: d.title,
3113                        source: d.source,
3114                        ingested_at_ms: d.ingested_at_ms,
3115                    };
3116                    let node = graph_node_for_document(&tenant_id_for_blocking, &exp);
3117                    staged.push(StagingNode {
3118                        sort_ts_ms: d.ingested_at_ms,
3119                        sort_id: id.clone(),
3120                        node,
3121                    });
3122                }
3123            }
3124            if want_chunk {
3125                let chunks = fetch_chunks_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3126                for c in chunks {
3127                    let id = format!("chunk:{}", c.chunk_id);
3128                    let exp = ExpandedChunk {
3129                        chunk_id: c.chunk_id,
3130                        chunk_index: c.chunk_index,
3131                        content: c.content,
3132                    };
3133                    // graph_node_for_chunk sets ts_ms = None for the
3134                    // wire format (chunks don't have a natural user-
3135                    // facing timestamp); but for sorting we use the
3136                    // row's created_at_ms.
3137                    let mut node = graph_node_for_chunk(&tenant_id_for_blocking, &exp);
3138                    node.ts_ms = Some(c.created_at_ms);
3139                    staged.push(StagingNode {
3140                        sort_ts_ms: c.created_at_ms,
3141                        sort_id: id.clone(),
3142                        node,
3143                    });
3144                }
3145            }
3146            if want_cluster {
3147                let cls = fetch_clusters_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
3148                for c in cls {
3149                    let id = format!("cl:{}", c.cluster_id);
3150                    let node = graph_node_for_cluster(
3151                        &tenant_id_for_blocking,
3152                        &c.cluster_id,
3153                        c.abstraction.as_deref(),
3154                        c.created_at_ms,
3155                    );
3156                    staged.push(StagingNode {
3157                        sort_ts_ms: c.created_at_ms,
3158                        sort_id: id.clone(),
3159                        node,
3160                    });
3161                }
3162            }
3163            if want_entity {
3164                let (ents, was_cap_reached) =
3165                    fetch_entities_for_nodes(conn, since_ms, until_ms, cursor_ref)?;
3166                cap_reached = was_cap_reached;
3167                for e in ents {
3168                    let id = format!("ent:{}", e.value);
3169                    let mut node = graph_node_for_entity(&tenant_id_for_blocking, &e.value);
3170                    node.ts_ms = Some(e.first_seen_ms);
3171                    node.preview =
3172                        Some(format!("Referenced in {} triples", e.ref_count));
3173                    staged.push(StagingNode {
3174                        sort_ts_ms: e.first_seen_ms,
3175                        sort_id: id.clone(),
3176                        node,
3177                    });
3178                }
3179            }
3180            Ok::<_, rusqlite::Error>((staged, cap_reached))
3181        })
3182        .await
3183        .map_err(ApiError::from)?;
3184
3185    // Apply cursor filter.
3186    if let Some(cur) = &cursor {
3187        staged.retain(|s| node_passes_cursor(s.sort_ts_ms, &s.sort_id, cur));
3188    }
3189
3190    // Sort `ts_ms DESC, id ASC`.
3191    staged.sort_by(|a, b| {
3192        cmp_node_sort_keys((a.sort_ts_ms, &a.sort_id), (b.sort_ts_ms, &b.sort_id))
3193    });
3194
3195    // Apply page limit + compute next_cursor.
3196    let limit_us = limit as usize;
3197    let next_cursor = if staged.len() > limit_us {
3198        let last = &staged[limit_us - 1];
3199        Some(NodesCursor {
3200            ts_ms: last.sort_ts_ms,
3201            id: last.sort_id.clone(),
3202        })
3203    } else {
3204        None
3205    };
3206    staged.truncate(limit_us);
3207
3208    let next_cursor_str = match next_cursor {
3209        Some(c) => Some(encode_cursor(&c)?),
3210        None => None,
3211    };
3212
3213    let nodes: Vec<GraphNode> = staged.into_iter().map(|s| s.node).collect();
3214    let payload = GraphNodesResponse {
3215        nodes,
3216        next_cursor: next_cursor_str,
3217    };
3218
3219    // Attach the entity-cap header so clients can show truncation UX
3220    // without parsing the body.
3221    let mut response = Json(payload).into_response();
3222    if cap_reached {
3223        response
3224            .headers_mut()
3225            .insert(ENTITY_CAP_HEADER, HeaderValue::from_static("true"));
3226    }
3227    Ok(response)
3228}
3229
3230// --- /v1/graph/edges --------------------------------------------------
3231
3232#[derive(Debug)]
3233struct StagingEdge {
3234    edge: GraphEdge,
3235    kind_idx: u8,
3236    sub_id: String,
3237}
3238
3239fn cmp_edge_sort_keys(a: (u8, &str), b: (u8, &str)) -> std::cmp::Ordering {
3240    match a.0.cmp(&b.0) {
3241        std::cmp::Ordering::Equal => a.1.cmp(b.1),
3242        other => other,
3243    }
3244}
3245
3246fn edge_passes_cursor(kind_idx: u8, sub_id: &str, cursor: &EdgesCursor) -> bool {
3247    cmp_edge_sort_keys((kind_idx, sub_id), (cursor.kind_idx, cursor.sub_id.as_str()))
3248        == std::cmp::Ordering::Greater
3249}
3250
3251/// Whether the supplied focus `node_id` (kind, value) matches an edge's
3252/// (source, target) endpoint pair under a given edge kind. Used to
3253/// filter `?node_id=...` queries.
3254fn edge_touches_focus(
3255    kind: EdgeKind,
3256    focus_kind: NodeKind,
3257    focus_value: &str,
3258    src_value: &str,
3259    tgt_value: &str,
3260    extra_value: Option<&str>,
3261) -> bool {
3262    // Determine which endpoint kinds this edge family produces; if the
3263    // focus kind isn't compatible, no match.
3264    match kind {
3265        EdgeKind::Triple => match focus_kind {
3266            // Triple edges flow source_episode → ent:<object_id>. We
3267            // also expose subject/object entities as endpoints (see
3268            // emit_triple_edges_for_focus); the matching here covers
3269            // episode focus + entity focus + the symmetric pair.
3270            NodeKind::Episode => src_value == focus_value,
3271            NodeKind::Entity => {
3272                tgt_value == focus_value
3273                    || extra_value.map(|x| x == focus_value).unwrap_or(false)
3274                    || src_value == focus_value
3275            }
3276            _ => false,
3277        },
3278        EdgeKind::DocumentChunk => match focus_kind {
3279            NodeKind::Document => src_value == focus_value,
3280            NodeKind::Chunk => tgt_value == focus_value,
3281            _ => false,
3282        },
3283        EdgeKind::ClusterMember => match focus_kind {
3284            NodeKind::Cluster => src_value == focus_value,
3285            NodeKind::Episode => tgt_value == focus_value,
3286            _ => false,
3287        },
3288    }
3289}
3290
3291#[derive(Debug)]
3292struct EdgeRowTriple {
3293    triple_id: String,
3294    source_memory_id: Option<String>,
3295    object_id: String,
3296    predicate: String,
3297    confidence: f32,
3298}
3299
3300fn fetch_triple_edges(conn: &rusqlite::Connection) -> rusqlite::Result<Vec<EdgeRowTriple>> {
3301    // Emit one edge per triple: source_episode → ent:object_id. Skip
3302    // orphan triples (`source_episode_id IS NULL`). Bound the scan at
3303    // GRAPH_EDGES_MAX_LIMIT * a safety multiplier so a runaway tenant
3304    // doesn't OOM the page-builder; the merge-and-page step trims to
3305    // the real limit downstream.
3306    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
3307    let mut stmt = conn.prepare(
3308        "SELECT t.triple_id, e.memory_id, t.object_id, t.predicate, t.confidence
3309           FROM triples t
3310           LEFT JOIN episodes e ON e.rowid = t.source_episode_id
3311          WHERE t.status = 'active'
3312          ORDER BY t.triple_id ASC
3313          LIMIT ?1",
3314    )?;
3315    let rows: Vec<EdgeRowTriple> = stmt
3316        .query_map(rusqlite::params![safety_cap], |r| {
3317            Ok(EdgeRowTriple {
3318                triple_id: r.get(0)?,
3319                source_memory_id: r.get::<_, Option<String>>(1)?,
3320                object_id: r.get(2)?,
3321                predicate: r.get(3)?,
3322                confidence: r.get(4)?,
3323            })
3324        })?
3325        .collect::<rusqlite::Result<Vec<_>>>()?;
3326    Ok(rows)
3327}
3328
3329#[derive(Debug)]
3330struct EdgeRowDocChunk {
3331    chunk_id: String,
3332    doc_id: String,
3333}
3334
3335fn fetch_document_chunk_edges(
3336    conn: &rusqlite::Connection,
3337) -> rusqlite::Result<Vec<EdgeRowDocChunk>> {
3338    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
3339    let mut stmt = conn.prepare(
3340        "SELECT c.chunk_id, c.doc_id
3341           FROM document_chunks c
3342           JOIN documents d ON d.doc_id = c.doc_id
3343          WHERE d.status = 'active'
3344          ORDER BY c.chunk_id ASC
3345          LIMIT ?1",
3346    )?;
3347    let rows: Vec<EdgeRowDocChunk> = stmt
3348        .query_map(rusqlite::params![safety_cap], |r| {
3349            Ok(EdgeRowDocChunk {
3350                chunk_id: r.get(0)?,
3351                doc_id: r.get(1)?,
3352            })
3353        })?
3354        .collect::<rusqlite::Result<Vec<_>>>()?;
3355    Ok(rows)
3356}
3357
3358#[derive(Debug)]
3359struct EdgeRowClusterMember {
3360    cluster_id: String,
3361    memory_id: String,
3362}
3363
3364fn fetch_cluster_member_edges(
3365    conn: &rusqlite::Connection,
3366) -> rusqlite::Result<Vec<EdgeRowClusterMember>> {
3367    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
3368    let mut stmt = conn.prepare(
3369        "SELECT ce.cluster_id, ce.memory_id
3370           FROM cluster_episodes ce
3371           JOIN episodes e ON e.memory_id = ce.memory_id
3372          WHERE e.status = 'active'
3373          ORDER BY ce.cluster_id ASC, ce.memory_id ASC
3374          LIMIT ?1",
3375    )?;
3376    let rows: Vec<EdgeRowClusterMember> = stmt
3377        .query_map(rusqlite::params![safety_cap], |r| {
3378            Ok(EdgeRowClusterMember {
3379                cluster_id: r.get(0)?,
3380                memory_id: r.get(1)?,
3381            })
3382        })?
3383        .collect::<rusqlite::Result<Vec<_>>>()?;
3384    Ok(rows)
3385}
3386
3387/// `GET /v1/graph/edges`. Paginated edge catalog. See module-level
3388/// comments for the contract.
3389async fn graph_edges_handler(
3390    TenantExtractor(tenant): TenantExtractor,
3391    Query(q): Query<GraphEdgesQuery>,
3392) -> Result<Json<GraphEdgesResponse>, ApiError> {
3393    let limit = q.limit.unwrap_or(GRAPH_EDGES_DEFAULT_LIMIT);
3394    let limit = limit.clamp(1, GRAPH_EDGES_MAX_LIMIT);
3395    let kinds = parse_edge_kind_filter(q.r#type.as_deref())?;
3396    let cursor = match q.cursor.as_deref() {
3397        None => None,
3398        Some("") => None,
3399        Some(raw) => Some(decode_cursor::<EdgesCursor>(raw)?),
3400    };
3401
3402    let focus = match q.node_id.as_deref() {
3403        None => None,
3404        Some(raw) => {
3405            let (kind, value) = parse_node_id(raw)?;
3406            Some((kind, value.to_string()))
3407        }
3408    };
3409
3410    let want_triple = kinds.contains(&EdgeKind::Triple);
3411    let want_doc_chunk = kinds.contains(&EdgeKind::DocumentChunk);
3412    let want_cluster_member = kinds.contains(&EdgeKind::ClusterMember);
3413
3414    let staged: Vec<StagingEdge> = tenant
3415        .read()
3416        .interact(move |conn| {
3417            let mut staged: Vec<StagingEdge> = Vec::new();
3418
3419            if want_triple {
3420                for t in fetch_triple_edges(conn)? {
3421                    let src_id = match &t.source_memory_id {
3422                        Some(mid) => format!("ep:{mid}"),
3423                        None => continue, // orphan triple — skip
3424                    };
3425                    let tgt_id = format!("ent:{}", t.object_id);
3426                    if let Some((fk, fv)) = &focus {
3427                        // `src_value` for matching is the bare memory_id
3428                        // (after the `ep:` prefix); `tgt_value` is the
3429                        // bare entity value.
3430                        if !edge_touches_focus(
3431                            EdgeKind::Triple,
3432                            *fk,
3433                            fv,
3434                            t.source_memory_id
3435                                .as_deref()
3436                                .unwrap_or(""),
3437                            &t.object_id,
3438                            // Triples carry a subject_id too, but the
3439                            // emitted edge only goes ep → ent(object).
3440                            // For entity-focus matches we also accept
3441                            // hits on subject_id; surface it through
3442                            // the `extra` slot.
3443                            None,
3444                        ) {
3445                            continue;
3446                        }
3447                    }
3448                    let edge = GraphEdge {
3449                        id: edge_id(&src_id, "triple", &tgt_id),
3450                        source: src_id,
3451                        target: tgt_id,
3452                        kind: "triple",
3453                        predicate: Some(t.predicate),
3454                        weight: Some(t.confidence),
3455                    };
3456                    staged.push(StagingEdge {
3457                        edge,
3458                        kind_idx: EdgeKind::Triple.order_idx(),
3459                        sub_id: t.triple_id,
3460                    });
3461                }
3462            }
3463            if want_doc_chunk {
3464                for dc in fetch_document_chunk_edges(conn)? {
3465                    let src_id = format!("doc:{}", dc.doc_id);
3466                    let tgt_id = format!("chunk:{}", dc.chunk_id);
3467                    if let Some((fk, fv)) = &focus {
3468                        if !edge_touches_focus(
3469                            EdgeKind::DocumentChunk,
3470                            *fk,
3471                            fv,
3472                            &dc.doc_id,
3473                            &dc.chunk_id,
3474                            None,
3475                        ) {
3476                            continue;
3477                        }
3478                    }
3479                    let edge = GraphEdge {
3480                        id: edge_id(&src_id, "document_chunk", &tgt_id),
3481                        source: src_id,
3482                        target: tgt_id,
3483                        kind: "document_chunk",
3484                        predicate: None,
3485                        weight: None,
3486                    };
3487                    staged.push(StagingEdge {
3488                        edge,
3489                        kind_idx: EdgeKind::DocumentChunk.order_idx(),
3490                        sub_id: dc.chunk_id,
3491                    });
3492                }
3493            }
3494            if want_cluster_member {
3495                for cm in fetch_cluster_member_edges(conn)? {
3496                    let src_id = format!("cl:{}", cm.cluster_id);
3497                    let tgt_id = format!("ep:{}", cm.memory_id);
3498                    if let Some((fk, fv)) = &focus {
3499                        if !edge_touches_focus(
3500                            EdgeKind::ClusterMember,
3501                            *fk,
3502                            fv,
3503                            &cm.cluster_id,
3504                            &cm.memory_id,
3505                            None,
3506                        ) {
3507                            continue;
3508                        }
3509                    }
3510                    let edge = GraphEdge {
3511                        id: edge_id(&src_id, "cluster_member", &tgt_id),
3512                        source: src_id,
3513                        target: tgt_id,
3514                        kind: "cluster_member",
3515                        predicate: None,
3516                        weight: None,
3517                    };
3518                    let sub_id = format!("{}\u{1f}{}", cm.cluster_id, cm.memory_id);
3519                    staged.push(StagingEdge {
3520                        edge,
3521                        kind_idx: EdgeKind::ClusterMember.order_idx(),
3522                        sub_id,
3523                    });
3524                }
3525            }
3526            Ok::<_, rusqlite::Error>(staged)
3527        })
3528        .await
3529        .map_err(ApiError::from)?;
3530
3531    // Apply cursor filter.
3532    let mut staged = staged;
3533    if let Some(cur) = &cursor {
3534        staged.retain(|s| edge_passes_cursor(s.kind_idx, &s.sub_id, cur));
3535    }
3536
3537    // Sort `(kind_idx ASC, sub_id ASC)` — stable, simple.
3538    staged.sort_by(|a, b| {
3539        cmp_edge_sort_keys((a.kind_idx, &a.sub_id), (b.kind_idx, &b.sub_id))
3540    });
3541
3542    let limit_us = limit as usize;
3543    let next_cursor = if staged.len() > limit_us {
3544        let last = &staged[limit_us - 1];
3545        Some(EdgesCursor {
3546            kind_idx: last.kind_idx,
3547            sub_id: last.sub_id.clone(),
3548        })
3549    } else {
3550        None
3551    };
3552    staged.truncate(limit_us);
3553    let next_cursor_str = match next_cursor {
3554        Some(c) => Some(encode_cursor(&c)?),
3555        None => None,
3556    };
3557
3558    let edges: Vec<GraphEdge> = staged.into_iter().map(|s| s.edge).collect();
3559    Ok(Json(GraphEdgesResponse {
3560        edges,
3561        next_cursor: next_cursor_str,
3562    }))
3563}
3564
3565// ---------------------------------------------------------------------------
3566// Graph inspect — kind-discriminated full-record drill (v0.10.0)
3567//
3568// `GET /v1/graph/inspect/{id}` powers solo-web's right-side inspector
3569// panel. Path `id` carries the prefixed node identifier (ep:/doc:/chunk:/
3570// cl:/ent:); the handler dispatches per-kind and returns the same wire
3571// shape solo-web's `InspectResponse` expects: `{ node, full_text?,
3572// triples_in[], triples_out[] }`.
3573//
3574// Per-kind contract (v0.10.0 P1):
3575//   * `ep:<memory_id>`     full_text = episodes.content (untruncated),
3576//                          triples_in = [],
3577//                          triples_out = triples WHERE source_episode_id = rowid
3578//                          (one edge per triple, ep -> ent(object), predicate
3579//                          + weight surfaced). Episodes never appear as triple
3580//                          subjects/objects, so triples_in is structurally
3581//                          empty.
3582//   * `doc:<doc_id>`       full_text = concatenated chunk bodies separated by
3583//                          "\n\n" (no `documents.full_text` column exists; the
3584//                          chunks-concat path produces the same final text the
3585//                          ingester chunked from). triples_in/out = [] --
3586//                          documents don't directly carry triples; their
3587//                          chunks transitively do, but the inspector reaches
3588//                          those via the existing `/v1/graph/expand` drill.
3589//   * `chunk:<chunk_id>`   full_text = document_chunks.content,
3590//                          triples_in/out = [] (chunks aren't triple endpoints).
3591//   * `cl:<cluster_id>`    full_text = label + "\n\n" + abstraction
3592//                          (`semantic_abstractions.content`) when an
3593//                          abstraction exists; just the label otherwise.
3594//                          triples_in/out = [].
3595//   * `ent:<value>`        full_text = None (entities have no body),
3596//                          triples_in = [],
3597//                          triples_out = all triples where the entity appears
3598//                          as subject OR object. Capped at
3599//                          `GRAPH_INSPECT_ENTITY_TRIPLES_CAP` (50). Entities
3600//                          are synthetic -- an `ent:<value>` with zero triples
3601//                          in the tenant returns 404 (the entity exists only
3602//                          if at least one triple references it).
3603//
3604// Error semantics: 404 if the prefixed id has no row in the tenant's DB.
3605// 400 if the prefix is unknown or the body after `:` is empty (reuses
3606// `parse_node_id`). Tenant + auth are handled by the existing extractors.
3607//
3608// Lesson #30: no audit emit. Inspect is a derived read over already-
3609// audited primitives.
3610// ---------------------------------------------------------------------------
3611
3612/// Cap on triples returned for an entity inspect. Entities can be heavily
3613/// referenced ("user", "Alice"); the inspector panel only needs enough
3614/// for orientation. The `/v1/graph/expand?kind=triple` path delivers the
3615/// paginated full set when the UI needs more.
3616const GRAPH_INSPECT_ENTITY_TRIPLES_CAP: i64 = 50;
3617
3618#[derive(Debug, Serialize)]
3619struct GraphInspectResponse {
3620    node: GraphNode,
3621    #[serde(skip_serializing_if = "Option::is_none")]
3622    full_text: Option<String>,
3623    triples_in: Vec<GraphEdge>,
3624    triples_out: Vec<GraphEdge>,
3625}
3626
3627/// `GET /v1/graph/inspect/{id}`. See module-level comments.
3628async fn graph_inspect_handler(
3629    TenantExtractor(tenant): TenantExtractor,
3630    Path(id): Path<String>,
3631) -> Result<Json<GraphInspectResponse>, ApiError> {
3632    let (kind, value) = parse_node_id(&id)?;
3633    let tenant_id_str = tenant.tenant_id().to_string();
3634    let value = value.to_string();
3635    let node_id_full = id;
3636    match kind {
3637        NodeKind::Episode => {
3638            inspect_episode_node(&tenant, &tenant_id_str, value, node_id_full).await
3639        }
3640        NodeKind::Document => {
3641            inspect_document_node(&tenant, &tenant_id_str, value, node_id_full).await
3642        }
3643        NodeKind::Chunk => {
3644            inspect_chunk_node(&tenant, &tenant_id_str, value, node_id_full).await
3645        }
3646        NodeKind::Cluster => {
3647            inspect_cluster_node(&tenant, &tenant_id_str, value, node_id_full).await
3648        }
3649        NodeKind::Entity => {
3650            inspect_entity_node(&tenant, &tenant_id_str, value, node_id_full).await
3651        }
3652    }
3653    .map(Json)
3654}
3655
3656// ---- per-kind paths ----
3657
3658async fn inspect_episode_node(
3659    tenant: &TenantHandle,
3660    tenant_id: &str,
3661    memory_id: String,
3662    node_id_full: String,
3663) -> Result<GraphInspectResponse, ApiError> {
3664    let memory_id_for_err = memory_id.clone();
3665    let memory_id_q = memory_id.clone();
3666    // Fetch the episode row + all triples sourced from it in one
3667    // interact() call to keep the connection check-out short.
3668    let fetched: Option<(ExpandedEpisode, Vec<TripleRow>)> = tenant
3669        .read()
3670        .interact(move |conn| {
3671            let ep_row: Option<(i64, i64, String)> = conn
3672                .query_row(
3673                    "SELECT rowid, ts_ms, content
3674                       FROM episodes
3675                      WHERE memory_id = ?1
3676                        AND status = 'active'",
3677                    rusqlite::params![&memory_id_q],
3678                    |r| {
3679                        Ok((
3680                            r.get::<_, i64>(0)?,
3681                            r.get::<_, i64>(1)?,
3682                            r.get::<_, String>(2)?,
3683                        ))
3684                    },
3685                )
3686                .map(Some)
3687                .or_else(|e| match e {
3688                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
3689                    other => Err(other),
3690                })?;
3691            let Some((rowid, ts_ms, content)) = ep_row else {
3692                return Ok(None);
3693            };
3694            let mut stmt = conn.prepare(
3695                "SELECT subject_id, predicate, object_id, confidence
3696                   FROM triples
3697                  WHERE source_episode_id = ?1
3698                    AND status = 'active'
3699                  ORDER BY valid_from_ms DESC",
3700            )?;
3701            let triples = stmt
3702                .query_map(rusqlite::params![rowid], |r| {
3703                    Ok(TripleRow {
3704                        subject_id: r.get(0)?,
3705                        predicate: r.get(1)?,
3706                        object_id: r.get(2)?,
3707                        confidence: r.get(3)?,
3708                    })
3709                })?
3710                .collect::<rusqlite::Result<Vec<_>>>()?;
3711            let ep = ExpandedEpisode {
3712                memory_id: memory_id_q,
3713                ts_ms,
3714                content,
3715            };
3716            Ok::<_, rusqlite::Error>(Some((ep, triples)))
3717        })
3718        .await
3719        .map_err(ApiError::from)?;
3720
3721    let (ep, triples) = fetched.ok_or_else(|| {
3722        ApiError::not_found(format!(
3723            "node_id {node_id_full:?} (memory_id {memory_id_for_err}) not found in current tenant"
3724        ))
3725    })?;
3726
3727    let node = graph_node_for_episode(tenant_id, &ep);
3728    let full_text = Some(ep.content.clone());
3729    // Triples flow from this episode (the source) to entity endpoints.
3730    // Emit one edge per triple: ep -> ent(object), predicate from the
3731    // triple, weight = confidence. This mirrors the `/v1/graph/edges`
3732    // triple-edge convention so the renderer can dedupe via composite id.
3733    let mut triples_out = Vec::with_capacity(triples.len());
3734    for t in triples {
3735        let tgt_id = format!("ent:{}", t.object_id);
3736        triples_out.push(GraphEdge {
3737            id: edge_id(&node_id_full, "triple", &tgt_id),
3738            source: node_id_full.clone(),
3739            target: tgt_id,
3740            kind: "triple",
3741            predicate: Some(t.predicate),
3742            weight: Some(t.confidence),
3743        });
3744    }
3745    Ok(GraphInspectResponse {
3746        node,
3747        full_text,
3748        triples_in: Vec::new(),
3749        triples_out,
3750    })
3751}
3752
3753async fn inspect_document_node(
3754    tenant: &TenantHandle,
3755    tenant_id: &str,
3756    doc_id: String,
3757    node_id_full: String,
3758) -> Result<GraphInspectResponse, ApiError> {
3759    let doc_id_for_err = doc_id.clone();
3760    let doc_id_q = doc_id.clone();
3761    // Fetch the document row + all chunk bodies (ORDER BY chunk_index) in
3762    // one interact() call. The chunks-concat path is the source of full_text
3763    // since the `documents` table doesn't carry the original raw text. For
3764    // v0.10.0 P1 we concatenate every chunk; pagination is the inspector
3765    // panel's responsibility if the document is very large.
3766    let fetched: Option<(ExpandedDocument, Vec<String>)> = tenant
3767        .read()
3768        .interact(move |conn| {
3769            let doc_row: Option<ExpandedDocument> = conn
3770                .query_row(
3771                    "SELECT doc_id, title, source, ingested_at_ms
3772                       FROM documents
3773                      WHERE doc_id = ?1
3774                        AND status = 'active'",
3775                    rusqlite::params![&doc_id_q],
3776                    |r| {
3777                        Ok(ExpandedDocument {
3778                            doc_id: r.get(0)?,
3779                            title: r.get(1)?,
3780                            source: r.get(2)?,
3781                            ingested_at_ms: r.get(3)?,
3782                        })
3783                    },
3784                )
3785                .map(Some)
3786                .or_else(|e| match e {
3787                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
3788                    other => Err(other),
3789                })?;
3790            let Some(doc) = doc_row else {
3791                return Ok(None);
3792            };
3793            let mut stmt = conn.prepare(
3794                "SELECT content
3795                   FROM document_chunks
3796                  WHERE doc_id = ?1
3797                  ORDER BY chunk_index ASC",
3798            )?;
3799            let chunks = stmt
3800                .query_map(rusqlite::params![&doc_id_q], |r| r.get::<_, String>(0))?
3801                .collect::<rusqlite::Result<Vec<_>>>()?;
3802            Ok::<_, rusqlite::Error>(Some((doc, chunks)))
3803        })
3804        .await
3805        .map_err(ApiError::from)?;
3806
3807    let (doc, chunks) = fetched.ok_or_else(|| {
3808        ApiError::not_found(format!(
3809            "node_id {node_id_full:?} (doc_id {doc_id_for_err}) not found in current tenant"
3810        ))
3811    })?;
3812
3813    let full_text = if chunks.is_empty() {
3814        // Document with zero chunks (e.g. mid-ingest, or an empty source).
3815        // Return None to signal "no body available" rather than an empty
3816        // string -- saves the renderer a degenerate code path.
3817        None
3818    } else {
3819        Some(chunks.join("\n\n"))
3820    };
3821
3822    Ok(GraphInspectResponse {
3823        node: graph_node_for_document(tenant_id, &doc),
3824        full_text,
3825        triples_in: Vec::new(),
3826        triples_out: Vec::new(),
3827    })
3828}
3829
3830async fn inspect_chunk_node(
3831    tenant: &TenantHandle,
3832    tenant_id: &str,
3833    chunk_id: String,
3834    node_id_full: String,
3835) -> Result<GraphInspectResponse, ApiError> {
3836    let chunk_id_for_err = chunk_id.clone();
3837    let chunk_id_q = chunk_id.clone();
3838    let row: Option<(ExpandedChunk, i64)> = tenant
3839        .read()
3840        .interact(move |conn| {
3841            conn.query_row(
3842                "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
3843                   FROM document_chunks c
3844                   JOIN documents d ON d.doc_id = c.doc_id
3845                  WHERE c.chunk_id = ?1
3846                    AND d.status = 'active'",
3847                rusqlite::params![&chunk_id_q],
3848                |r| {
3849                    Ok((
3850                        ExpandedChunk {
3851                            chunk_id: r.get(0)?,
3852                            chunk_index: r.get(1)?,
3853                            content: r.get(2)?,
3854                        },
3855                        r.get::<_, i64>(3)?,
3856                    ))
3857                },
3858            )
3859            .map(Some)
3860            .or_else(|e| match e {
3861                rusqlite::Error::QueryReturnedNoRows => Ok(None),
3862                other => Err(other),
3863            })
3864        })
3865        .await
3866        .map_err(ApiError::from)?;
3867
3868    let (chunk, created_at_ms) = row.ok_or_else(|| {
3869        ApiError::not_found(format!(
3870            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
3871        ))
3872    })?;
3873
3874    let full_text = Some(chunk.content.clone());
3875    let mut node = graph_node_for_chunk(tenant_id, &chunk);
3876    // Mirror the `/v1/graph/nodes` chunk-row behaviour: surface
3877    // `created_at_ms` so the inspector panel has a sortable timestamp.
3878    node.ts_ms = Some(created_at_ms);
3879
3880    Ok(GraphInspectResponse {
3881        node,
3882        full_text,
3883        triples_in: Vec::new(),
3884        triples_out: Vec::new(),
3885    })
3886}
3887
3888async fn inspect_cluster_node(
3889    tenant: &TenantHandle,
3890    tenant_id: &str,
3891    cluster_id: String,
3892    node_id_full: String,
3893) -> Result<GraphInspectResponse, ApiError> {
3894    let cluster_id_for_err = cluster_id.clone();
3895    let cluster_id_q = cluster_id.clone();
3896    let row: Option<(Option<String>, i64)> = tenant
3897        .read()
3898        .interact(move |conn| {
3899            conn.query_row(
3900                "SELECT sa.content, c.created_at_ms
3901                   FROM clusters c
3902                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
3903                  WHERE c.cluster_id = ?1",
3904                rusqlite::params![&cluster_id_q],
3905                |r| Ok((r.get::<_, Option<String>>(0)?, r.get::<_, i64>(1)?)),
3906            )
3907            .map(Some)
3908            .or_else(|e| match e {
3909                rusqlite::Error::QueryReturnedNoRows => Ok(None),
3910                other => Err(other),
3911            })
3912        })
3913        .await
3914        .map_err(ApiError::from)?;
3915
3916    let (abstraction, created_at_ms) = row.ok_or_else(|| {
3917        ApiError::not_found(format!(
3918            "node_id {node_id_full:?} (cluster_id {cluster_id_for_err}) not found in current tenant"
3919        ))
3920    })?;
3921
3922    // full_text is "<cluster_id label>\n\n<abstraction>" when an abstraction
3923    // exists; just the label otherwise. Brief "cluster" -- the cluster
3924    // label is `clusters.cluster_id` (the user-facing label is the
3925    // abstraction; clusters don't have a `label` column).
3926    let full_text = match abstraction.as_deref() {
3927        Some(a) => Some(format!("cluster {cluster_id_for_err}\n\n{a}")),
3928        None => Some(format!("cluster {cluster_id_for_err}")),
3929    };
3930
3931    Ok(GraphInspectResponse {
3932        node: graph_node_for_cluster(
3933            tenant_id,
3934            &cluster_id_for_err,
3935            abstraction.as_deref(),
3936            created_at_ms,
3937        ),
3938        full_text,
3939        triples_in: Vec::new(),
3940        triples_out: Vec::new(),
3941    })
3942}
3943
3944async fn inspect_entity_node(
3945    tenant: &TenantHandle,
3946    tenant_id: &str,
3947    entity_value: String,
3948    node_id_full: String,
3949) -> Result<GraphInspectResponse, ApiError> {
3950    // Entities are synthetic. They "exist" only if at least one triple
3951    // references them as subject or object. Zero triples -> 404 per brief.
3952    let entity_q = entity_value.clone();
3953    let rows: Vec<TripleRow> = tenant
3954        .read()
3955        .interact(move |conn| {
3956            let mut stmt = conn.prepare(
3957                "SELECT subject_id, predicate, object_id, confidence
3958                   FROM triples
3959                  WHERE (subject_id = ?1 OR object_id = ?1)
3960                    AND status = 'active'
3961                  ORDER BY valid_from_ms DESC
3962                  LIMIT ?2",
3963            )?;
3964            stmt.query_map(
3965                rusqlite::params![&entity_q, GRAPH_INSPECT_ENTITY_TRIPLES_CAP],
3966                |r| {
3967                    Ok(TripleRow {
3968                        subject_id: r.get(0)?,
3969                        predicate: r.get(1)?,
3970                        object_id: r.get(2)?,
3971                        confidence: r.get(3)?,
3972                    })
3973                },
3974            )?
3975            .collect::<rusqlite::Result<Vec<_>>>()
3976        })
3977        .await
3978        .map_err(ApiError::from)?;
3979
3980    if rows.is_empty() {
3981        return Err(ApiError::not_found(format!(
3982            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be inspectable"
3983        )));
3984    }
3985
3986    // Triples flow out FROM the entity to its counterpart. For each row
3987    // determine which side the entity appears on and emit ent:<self> ->
3988    // ent:<other>. Brief calls these triples_out (entities don't have
3989    // structural triples_in in v0.10.0 P1).
3990    let mut triples_out = Vec::with_capacity(rows.len());
3991    for t in rows {
3992        let other = if t.subject_id == entity_value {
3993            t.object_id
3994        } else {
3995            // entity_value matched on object_id; counterpart is subject.
3996            t.subject_id
3997        };
3998        let tgt_id = format!("ent:{other}");
3999        triples_out.push(GraphEdge {
4000            id: edge_id(&node_id_full, "triple", &tgt_id),
4001            source: node_id_full.clone(),
4002            target: tgt_id,
4003            kind: "triple",
4004            predicate: Some(t.predicate),
4005            weight: Some(t.confidence),
4006        });
4007    }
4008
4009    Ok(GraphInspectResponse {
4010        node: graph_node_for_entity(tenant_id, &entity_value),
4011        full_text: None,
4012        triples_in: Vec::new(),
4013        triples_out,
4014    })
4015}
4016
4017// ---------------------------------------------------------------------------
4018// Graph neighbors -- unified explicit + HNSW-semantic (v0.10.0)
4019//
4020// `GET /v1/graph/neighbors/{id}` powers solo-web's "show similar" overlay.
4021// Returns the same `GraphResponse { nodes, edges }` envelope as the rest of
4022// the family, combining:
4023//
4024//   * Explicit edges (triples / document_chunk / cluster_member) incident
4025//     to the focal node -- the same shape `/v1/graph/expand` produces for
4026//     a given (node_id, edge_kind) pair, but UNIONed across every edge kind
4027//     compatible with the focal node's kind.
4028//
4029//   * HNSW-semantic edges (cosine-similarity neighbors) -- only valid for
4030//     `ep:` (episodes) and `chunk:` (chunks); other source kinds return
4031//     400 when `kind=semantic` is requested alone, or are silently skipped
4032//     when `kind=both` is requested (explicit-only path still runs).
4033//
4034// Why this isn't just expand-with-a-flag: `/v1/graph/expand` takes a
4035// specific `kind=<edge-kind>` parameter and expands along ONE edge kind at
4036// a time. `/v1/graph/neighbors/:id` UNIFIES all compatible edge kinds
4037// incident to the focal node into one response. Different UX (drill vs.
4038// overview); different API; both needed.
4039//
4040// ## Refactor decision
4041//
4042// The brief recommends extracting `expand`'s per-kind helpers into a
4043// shared module. In practice the `expand_*` async fns already do exactly
4044// what neighbors needs for the explicit path (same response shape, same
4045// tenant + auth + existence semantics). To keep the change surgical and
4046// to preserve `expand`'s existing tests byte-for-byte, neighbors **reuses
4047// the existing `expand_*` async fns directly** rather than refactoring
4048// their bodies. The explicit path is a thin orchestrator that calls every
4049// `expand_*` fn compatible with the focal node's kind and concatenates
4050// the results.
4051//
4052// ## Dedup rule (kind=both)
4053//
4054// When an edge with the same (source, target) appears in BOTH the
4055// explicit and the semantic result sets, the explicit edge wins -- the
4056// semantic edge is dropped. We dedupe by `(source, target)` (NOT by full
4057// edge id, which encodes the kind too): the rule "explicit beats
4058// semantic" only makes sense when both endpoints agree, regardless of
4059// kind. In practice this is most likely to fire when an entity-focused
4060// expand (which surfaces episodes as triple-targets) collides with a
4061// semantic search hit on the same episode pair.
4062//
4063// ## Limit policy
4064//
4065// `limit` is applied PER KIND, not total. With `limit=25` and
4066// `kind=both`, the response carries up to 25 explicit + 25 semantic
4067// edges (minus dedupe). Silent clamp at 100 (matches the rest of the
4068// `/v1/graph/*` family).
4069//
4070// ## Threshold filter
4071//
4072// `threshold` (default 0.75) filters semantic neighbors by
4073// `weight >= threshold`, where `weight = (1 - cos_distance).max(0)`. The
4074// default is conservative -- below 0.75 the renderer typically shows too
4075// many spurious edges for a useful "show similar" overlay. Callers can
4076// dial down (e.g. `?threshold=0.5`) for a broader view.
4077//
4078// See `docs/dev-log/0116-graph-neighbors-impl.md` for the design notes.
4079// ---------------------------------------------------------------------------
4080
4081/// Default page size when the caller omits `?limit=`. Conservative so the
4082/// "show similar" overlay isn't visually overwhelming on first click.
4083const GRAPH_NEIGHBORS_DEFAULT_LIMIT: u32 = 25;
4084/// Silent clamp ceiling. Matches the rest of the `/v1/graph/*` family.
4085const GRAPH_NEIGHBORS_MAX_LIMIT: u32 = 100;
4086/// Conservative similarity floor. Edges with `weight < threshold` are
4087/// dropped from the semantic result set.
4088const GRAPH_NEIGHBORS_DEFAULT_THRESHOLD: f32 = 0.75;
4089
4090/// Discriminator for which neighbor kinds the caller wants. Default is
4091/// `both` (explicit edges + HNSW-semantic).
4092#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
4093#[serde(rename_all = "snake_case")]
4094enum GraphNeighborsKind {
4095    Explicit,
4096    Semantic,
4097    #[default]
4098    Both,
4099}
4100
4101#[derive(Debug, Deserialize)]
4102struct GraphNeighborsQuery {
4103    #[serde(default)]
4104    kind: Option<GraphNeighborsKind>,
4105    #[serde(default)]
4106    threshold: Option<f32>,
4107    #[serde(default)]
4108    limit: Option<u32>,
4109}
4110
4111/// `GET /v1/graph/neighbors/{id}`. See module-level comments.
4112async fn graph_neighbors_handler(
4113    TenantExtractor(tenant): TenantExtractor,
4114    Path(id): Path<String>,
4115    Query(q): Query<GraphNeighborsQuery>,
4116) -> Result<Json<GraphExpandResponse>, ApiError> {
4117    let kind = q.kind.unwrap_or_default();
4118    let threshold = q.threshold.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_THRESHOLD);
4119    if !(0.0..=1.0).contains(&threshold) {
4120        return Err(ApiError::bad_request(format!(
4121            "threshold must be in [0.0, 1.0]; got {threshold}"
4122        )));
4123    }
4124    // Silent clamp at GRAPH_NEIGHBORS_MAX_LIMIT -- matches expand /
4125    // nodes / edges convention. Test `neighbors_limit_clamped_at_100`
4126    // locks in the clamp policy.
4127    let limit_raw = q.limit.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_LIMIT);
4128    let limit = limit_raw.clamp(1, GRAPH_NEIGHBORS_MAX_LIMIT);
4129
4130    let (node_kind, value) = parse_node_id(&id)?;
4131    let value_owned = value.to_string();
4132    let tenant_id_str = tenant.tenant_id().to_string();
4133    let node_id_full = id;
4134
4135    // Existence probe for the focal node. The explicit + semantic paths
4136    // each handle "node-found-but-zero-neighbors" gracefully (200 with
4137    // empty arrays) -- but we want a true 404 when the id resolves to no
4138    // row at all, regardless of which kind the caller asked for. This
4139    // matches the inspect endpoint's gate: a node has to exist to be
4140    // meaningfully "neighborable".
4141    ensure_neighbors_focal_exists(&tenant, node_kind, &value_owned, &node_id_full).await?;
4142
4143    // Dispatch.
4144    let (explicit_nodes, explicit_edges) = if matches!(
4145        kind,
4146        GraphNeighborsKind::Explicit | GraphNeighborsKind::Both
4147    ) {
4148        neighbors_explicit(
4149            &tenant,
4150            &tenant_id_str,
4151            node_kind,
4152            &value_owned,
4153            &node_id_full,
4154            limit as i64,
4155        )
4156        .await?
4157    } else {
4158        (Vec::new(), Vec::new())
4159    };
4160
4161    let (semantic_nodes, semantic_edges) = if matches!(
4162        kind,
4163        GraphNeighborsKind::Semantic | GraphNeighborsKind::Both
4164    ) {
4165        match neighbors_semantic(
4166            &tenant,
4167            &tenant_id_str,
4168            node_kind,
4169            &value_owned,
4170            &node_id_full,
4171            limit,
4172            threshold,
4173        )
4174        .await
4175        {
4176            Ok(parts) => parts,
4177            Err(e) => {
4178                // `kind=semantic` alone against an unsupported focal node
4179                // (doc/cl/ent) is a hard 400 -- the caller asked for ONLY
4180                // semantic neighbors and there are none possible.
4181                //
4182                // `kind=both` against an unsupported focal node silently
4183                // skips the semantic step; the explicit path still
4184                // delivers a meaningful answer. This mirrors the
4185                // pragmatic UX: clicking "show similar" on an entity
4186                // still surfaces the entity's triples without surfacing a
4187                // pointless error.
4188                if matches!(kind, GraphNeighborsKind::Semantic) {
4189                    return Err(e);
4190                }
4191                (Vec::new(), Vec::new())
4192            }
4193        }
4194    } else {
4195        (Vec::new(), Vec::new())
4196    };
4197
4198    // Merge + dedupe. Explicit edges win over semantic edges with the
4199    // same (source, target). Nodes dedupe by id.
4200    let mut explicit_endpoints: std::collections::HashSet<(String, String)> =
4201        std::collections::HashSet::with_capacity(explicit_edges.len());
4202    for e in &explicit_edges {
4203        explicit_endpoints.insert((e.source.clone(), e.target.clone()));
4204    }
4205
4206    let mut nodes: Vec<GraphNode> = Vec::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4207    let mut edges: Vec<GraphEdge> =
4208        Vec::with_capacity(explicit_edges.len() + semantic_edges.len());
4209    let mut seen_node_ids: std::collections::HashSet<String> =
4210        std::collections::HashSet::with_capacity(explicit_nodes.len() + semantic_nodes.len());
4211
4212    for n in explicit_nodes {
4213        if seen_node_ids.insert(n.id.clone()) {
4214            nodes.push(n);
4215        }
4216    }
4217    for e in explicit_edges {
4218        edges.push(e);
4219    }
4220    for n in semantic_nodes {
4221        if seen_node_ids.insert(n.id.clone()) {
4222            nodes.push(n);
4223        }
4224    }
4225    for e in semantic_edges {
4226        if explicit_endpoints.contains(&(e.source.clone(), e.target.clone())) {
4227            // Explicit edge already covers this pair -- drop the semantic
4228            // duplicate per the dedup rule. The semantic node may still
4229            // remain in `nodes` if no other edge already pulled it in;
4230            // that's fine -- the renderer renders nodes with weight-less
4231            // structural edges either way.
4232            continue;
4233        }
4234        edges.push(e);
4235    }
4236
4237    Ok(Json(GraphExpandResponse { nodes, edges }))
4238}
4239
4240/// Existence probe for the focal node. Translates the prefixed id into a
4241/// per-kind COUNT query against the matching table. Returns 404 (not 200
4242/// with empty arrays) when the node doesn't exist in the tenant's DB.
4243/// For entities the "existence" check is "is this entity referenced by
4244/// at least one triple" -- consistent with the inspect-entity contract
4245/// from `0115`.
4246async fn ensure_neighbors_focal_exists(
4247    tenant: &TenantHandle,
4248    node_kind: NodeKind,
4249    value: &str,
4250    node_id_full: &str,
4251) -> Result<(), ApiError> {
4252    match node_kind {
4253        NodeKind::Episode => ensure_episode_exists(tenant, value, node_id_full).await,
4254        NodeKind::Cluster => ensure_cluster_exists(tenant, value, node_id_full).await,
4255        NodeKind::Document => ensure_document_exists(tenant, value, node_id_full).await,
4256        NodeKind::Chunk => ensure_chunk_exists(tenant, value, node_id_full).await,
4257        NodeKind::Entity => ensure_entity_referenced(tenant, value, node_id_full).await,
4258    }
4259}
4260
4261/// 404 if the chunk_id has no row in this tenant's `document_chunks`
4262/// table whose parent doc is active. Mirrors `ensure_*_exists` from
4263/// `expand`.
4264async fn ensure_chunk_exists(
4265    tenant: &TenantHandle,
4266    chunk_id: &str,
4267    node_id_full: &str,
4268) -> Result<(), ApiError> {
4269    let chunk_id_q = chunk_id.to_string();
4270    let exists: i64 = tenant
4271        .read()
4272        .interact(move |conn| {
4273            conn.query_row(
4274                "SELECT COUNT(*)
4275                   FROM document_chunks c
4276                   JOIN documents d ON d.doc_id = c.doc_id
4277                  WHERE c.chunk_id = ?1
4278                    AND d.status = 'active'",
4279                rusqlite::params![&chunk_id_q],
4280                |r| r.get(0),
4281            )
4282        })
4283        .await
4284        .map_err(ApiError::from)?;
4285    if exists == 0 {
4286        return Err(ApiError::not_found(format!(
4287            "node_id {node_id_full:?} not found in current tenant"
4288        )));
4289    }
4290    Ok(())
4291}
4292
4293/// 404 if the entity isn't referenced by at least one active triple in
4294/// the tenant. Matches the inspect-entity 404 contract: entities are
4295/// synthetic, "existence" is "shows up in at least one triple".
4296async fn ensure_entity_referenced(
4297    tenant: &TenantHandle,
4298    entity_value: &str,
4299    node_id_full: &str,
4300) -> Result<(), ApiError> {
4301    let entity_q = entity_value.to_string();
4302    let exists: i64 = tenant
4303        .read()
4304        .interact(move |conn| {
4305            conn.query_row(
4306                "SELECT COUNT(*)
4307                   FROM triples
4308                  WHERE (subject_id = ?1 OR object_id = ?1)
4309                    AND status = 'active'",
4310                rusqlite::params![&entity_q],
4311                |r| r.get(0),
4312            )
4313        })
4314        .await
4315        .map_err(ApiError::from)?;
4316    if exists == 0 {
4317        return Err(ApiError::not_found(format!(
4318            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be neighborable"
4319        )));
4320    }
4321    Ok(())
4322}
4323
4324/// Explicit-neighbor path. Dispatches per focal node kind, calling the
4325/// existing `expand_*` async fns for each compatible edge kind and
4326/// concatenating the results. This is the "reuse" refactor decision:
4327/// no duplication of expand's SQL, and expand's tests stay byte-for-byte
4328/// intact because we don't touch its bodies.
4329async fn neighbors_explicit(
4330    tenant: &TenantHandle,
4331    tenant_id: &str,
4332    node_kind: NodeKind,
4333    value: &str,
4334    node_id_full: &str,
4335    limit: i64,
4336) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
4337    let mut nodes: Vec<GraphNode> = Vec::new();
4338    let mut edges: Vec<GraphEdge> = Vec::new();
4339
4340    match node_kind {
4341        NodeKind::Episode => {
4342            // Episodes have two compatible explicit-edge kinds:
4343            //   * cluster_member (episode -> clusters)
4344            //   * triple (episode -> entities, plus subj/obj entity pairs)
4345            //
4346            // document_chunk doesn't apply (episodes aren't documents).
4347            // Run each path, concat. Per-kind limit -- the caller asked for
4348            // up to `limit` neighbors PER KIND.
4349            let r1 = expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
4350                .await?;
4351            nodes.extend(r1.nodes);
4352            edges.extend(r1.edges);
4353            let r2 =
4354                expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
4355            nodes.extend(r2.nodes);
4356            edges.extend(r2.edges);
4357        }
4358        NodeKind::Document => {
4359            // Documents have one compatible explicit-edge kind:
4360            // document_chunk (document -> chunks).
4361            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
4362                .await?;
4363            nodes.extend(r.nodes);
4364            edges.extend(r.edges);
4365        }
4366        NodeKind::Chunk => {
4367            // Chunks have one compatible explicit-edge kind:
4368            // document_chunk (chunk -> parent document).
4369            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
4370                .await?;
4371            nodes.extend(r.nodes);
4372            edges.extend(r.edges);
4373        }
4374        NodeKind::Cluster => {
4375            // Clusters have one compatible explicit-edge kind:
4376            // cluster_member (cluster -> episodes).
4377            let r = expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
4378                .await?;
4379            nodes.extend(r.nodes);
4380            edges.extend(r.edges);
4381        }
4382        NodeKind::Entity => {
4383            // Entities have one compatible explicit-edge kind:
4384            // triple (entity -> episodes where this entity is referenced).
4385            let r =
4386                expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
4387            nodes.extend(r.nodes);
4388            edges.extend(r.edges);
4389        }
4390    }
4391    Ok((nodes, edges))
4392}
4393
4394/// Semantic-neighbor path. Only valid for episode + chunk focal nodes;
4395/// other kinds return 400. Reuses the existing inner pipelines:
4396///
4397///   * Episodes -> `solo_query::recall::run_recall_inner` (same path
4398///     `expand_semantic` uses; filters out chunk hits).
4399///   * Chunks   -> `solo_query::doc_search::run_doc_search_inner` (the
4400///     equivalent chunk-restricted vector pipeline).
4401///
4402/// Re-embed the focal node's content for the HNSW query rather than
4403/// loading the persisted vector from `embeddings` -- the same trade-off
4404/// `expand_semantic` made: cheaper code path overall, with deterministic
4405/// embedders in tests + batch-sized embedders in prod making the recompute
4406/// cost negligible.
4407async fn neighbors_semantic(
4408    tenant: &TenantHandle,
4409    tenant_id: &str,
4410    node_kind: NodeKind,
4411    value: &str,
4412    node_id_full: &str,
4413    limit: u32,
4414    threshold: f32,
4415) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
4416    match node_kind {
4417        NodeKind::Episode => {
4418            neighbors_semantic_from_episode(
4419                tenant,
4420                tenant_id,
4421                value,
4422                node_id_full,
4423                limit,
4424                threshold,
4425            )
4426            .await
4427        }
4428        NodeKind::Chunk => {
4429            neighbors_semantic_from_chunk(
4430                tenant,
4431                tenant_id,
4432                value,
4433                node_id_full,
4434                limit,
4435                threshold,
4436            )
4437            .await
4438        }
4439        _ => Err(ApiError::bad_request(format!(
4440            "semantic neighbors only valid for episode or chunk source; got {}",
4441            node_kind.as_wire_str()
4442        ))),
4443    }
4444}
4445
4446async fn neighbors_semantic_from_episode(
4447    tenant: &TenantHandle,
4448    tenant_id: &str,
4449    memory_id: &str,
4450    node_id_full: &str,
4451    limit: u32,
4452    threshold: f32,
4453) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
4454    let memory_id_q = memory_id.to_string();
4455    let memory_id_for_self_excl = memory_id.to_string();
4456    let content: Option<String> = tenant
4457        .read()
4458        .interact(move |conn| {
4459            conn.query_row(
4460                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
4461                rusqlite::params![&memory_id_q],
4462                |r| r.get::<_, String>(0),
4463            )
4464            .map(Some)
4465            .or_else(|e| match e {
4466                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4467                other => Err(other),
4468            })
4469        })
4470        .await
4471        .map_err(ApiError::from)?;
4472
4473    // Existence is guaranteed by the focal-exists probe earlier; an
4474    // empty content here would be a status-transition race we treat as
4475    // "nothing to compare against".
4476    let Some(content) = content else {
4477        return Ok((Vec::new(), Vec::new()));
4478    };
4479
4480    // Widen the request by 1 so dropping self doesn't shrink the page.
4481    let widened = (limit as usize).saturating_add(1).min(100);
4482    let result = solo_query::recall::run_recall_inner(
4483        tenant.embedder(),
4484        tenant.hnsw(),
4485        tenant.read(),
4486        &content,
4487        widened,
4488    )
4489    .await
4490    .map_err(ApiError::from)?;
4491
4492    let mut nodes = Vec::new();
4493    let mut edges = Vec::new();
4494    for hit in result.hits.into_iter() {
4495        if hit.memory_id == memory_id_for_self_excl {
4496            // Skip self.
4497            continue;
4498        }
4499        if nodes.len() as u32 >= limit {
4500            break;
4501        }
4502        let weight = (1.0 - hit.cos_distance).max(0.0);
4503        if weight < threshold {
4504            continue;
4505        }
4506        let target_id = format!("ep:{}", hit.memory_id);
4507        edges.push(GraphEdge {
4508            id: edge_id(node_id_full, "semantic", &target_id),
4509            source: node_id_full.to_string(),
4510            target: target_id,
4511            kind: "semantic",
4512            predicate: None,
4513            weight: Some(weight),
4514        });
4515        nodes.push(GraphNode {
4516            id: format!("ep:{}", hit.memory_id),
4517            kind: NodeKind::Episode.as_wire_str(),
4518            label: episode_label(&hit.content),
4519            ts_ms: None,
4520            tenant_id: tenant_id.to_string(),
4521            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
4522        });
4523    }
4524    Ok((nodes, edges))
4525}
4526
4527async fn neighbors_semantic_from_chunk(
4528    tenant: &TenantHandle,
4529    tenant_id: &str,
4530    chunk_id: &str,
4531    node_id_full: &str,
4532    limit: u32,
4533    threshold: f32,
4534) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
4535    let chunk_id_q = chunk_id.to_string();
4536    let chunk_id_for_self_excl = chunk_id.to_string();
4537    let content: Option<String> = tenant
4538        .read()
4539        .interact(move |conn| {
4540            conn.query_row(
4541                "SELECT c.content
4542                   FROM document_chunks c
4543                   JOIN documents d ON d.doc_id = c.doc_id
4544                  WHERE c.chunk_id = ?1
4545                    AND d.status = 'active'",
4546                rusqlite::params![&chunk_id_q],
4547                |r| r.get::<_, String>(0),
4548            )
4549            .map(Some)
4550            .or_else(|e| match e {
4551                rusqlite::Error::QueryReturnedNoRows => Ok(None),
4552                other => Err(other),
4553            })
4554        })
4555        .await
4556        .map_err(ApiError::from)?;
4557
4558    let Some(content) = content else {
4559        return Ok((Vec::new(), Vec::new()));
4560    };
4561
4562    let widened = (limit as usize).saturating_add(1).min(100);
4563    let hits = solo_query::doc_search::run_doc_search_inner(
4564        tenant.embedder(),
4565        tenant.hnsw(),
4566        tenant.read(),
4567        &content,
4568        widened,
4569    )
4570    .await
4571    .map_err(ApiError::from)?;
4572
4573    let mut nodes = Vec::new();
4574    let mut edges = Vec::new();
4575    for hit in hits.into_iter() {
4576        if hit.chunk_id == chunk_id_for_self_excl {
4577            continue;
4578        }
4579        if nodes.len() as u32 >= limit {
4580            break;
4581        }
4582        let weight = (1.0 - hit.cos_distance).max(0.0);
4583        if weight < threshold {
4584            continue;
4585        }
4586        let target_id = format!("chunk:{}", hit.chunk_id);
4587        edges.push(GraphEdge {
4588            id: edge_id(node_id_full, "semantic", &target_id),
4589            source: node_id_full.to_string(),
4590            target: target_id,
4591            kind: "semantic",
4592            predicate: None,
4593            weight: Some(weight),
4594        });
4595        let exp = ExpandedChunk {
4596            chunk_id: hit.chunk_id.clone(),
4597            chunk_index: hit.chunk_index as i64,
4598            content: hit.content.clone(),
4599        };
4600        nodes.push(graph_node_for_chunk(tenant_id, &exp));
4601    }
4602    Ok((nodes, edges))
4603}
4604
4605// ---------------------------------------------------------------------------
4606// /v1/graph/stream — SSE invalidation feed (v0.10.0)
4607//
4608// Powers solo-web's live-update behaviour: instead of polling, the
4609// frontend subscribes once and refetches its pages only when the
4610// writer-actor signals "your tenant's data changed". Per scoping doc
4611// §3 Decision C, the wire format is invalidation-shaped (not row
4612// payload) — the SSE channel says "refetch the affected page" rather
4613// than streaming actual rows.
4614//
4615// Wire format:
4616//
4617//   ```
4618//   event: init
4619//   data: {"connected": true, "tenant_id": "default", "ts_ms": 1715625600000}
4620//
4621//   event: invalidate
4622//   data: {"reason": "memory.remember", "tenant_id": "default",
4623//          "ts_ms": 1715625610000, "kind": "episode"}
4624//
4625//   event: heartbeat
4626//   data: {"ts_ms": 1715625640000}
4627//   ```
4628//
4629// Heartbeat: every [`STREAM_HEARTBEAT_SECS`] seconds, regardless of
4630// whether real events fired (simpler than resetting the timer on every
4631// invalidate; the cost is a few extra bytes per minute on idle).
4632//
4633// Lagged subscribers (subscriber polled slower than 256 writes) see one
4634// emit-only-once warning and resync via the next real `invalidate` —
4635// invalidation events are idempotent, so the missed batch reduces to a
4636// single refetch on the client side. No correctness loss.
4637//
4638// See `docs/dev-log/0117-graph-stream-impl.md` for the full design.
4639// ---------------------------------------------------------------------------
4640
4641/// Heartbeat interval for `/v1/graph/stream`. Fires unconditionally
4642/// every 30 seconds — easier to reason about than "fire 30s after the
4643/// last event", and keeps proxies happy without code that races a
4644/// reset on every invalidate.
4645pub const STREAM_HEARTBEAT_SECS: u64 = 30;
4646
4647/// SSE event name emitted on connection open. Single fire; client uses
4648/// this to confirm the subscription is live.
4649const STREAM_EVENT_INIT: &str = "init";
4650
4651/// SSE event name emitted on every writer-actor commit (and on
4652/// `gdpr.forget_user`'s non-writer-actor cascade).
4653const STREAM_EVENT_INVALIDATE: &str = "invalidate";
4654
4655/// SSE event name emitted by the heartbeat interval.
4656const STREAM_EVENT_HEARTBEAT: &str = "heartbeat";
4657
4658/// `GET /v1/graph/stream` — Server-Sent Events feed of
4659/// `InvalidateEvent`s scoped to the request's tenant.
4660///
4661/// Subscribes to the per-tenant `broadcast::Sender<InvalidateEvent>`
4662/// held by `TenantHandle` (populated by `TenantHandle::open`). The
4663/// stream:
4664///
4665///   1. Emits one `event: init` line at connection open.
4666///   2. Selects between (broadcast recv) and (heartbeat tick) in a
4667///      loop, emitting `invalidate` / `heartbeat` events as either
4668///      fires.
4669///   3. Exits when the client closes the connection (axum drops the
4670///      response future) OR the broadcast Sender is dropped (tenant
4671///      shutdown).
4672///
4673/// Auth + tenant resolution mirror the rest of `/v1/graph/*`: the
4674/// `auth_middleware` returns 401 on missing bearer; the
4675/// `TenantExtractor` resolves the per-tenant DB. The handler itself
4676/// has no per-route auth logic.
4677async fn graph_stream_handler(
4678    TenantExtractor(tenant): TenantExtractor,
4679) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
4680    // Subscribe BEFORE building the init event so a writer-actor
4681    // commit that lands in the (microscopic) window between init and
4682    // the first poll is still observed. `broadcast::Receiver` buffers
4683    // up to the channel's capacity from the moment of subscribe.
4684    let rx = tenant.invalidate_sender().subscribe();
4685    let tenant_id = tenant.tenant_id().to_string();
4686    let stream = build_invalidate_stream(rx, tenant_id, STREAM_HEARTBEAT_SECS);
4687    // axum's keep-alive layer adds its own `:` comment line every
4688    // configured interval; we keep that OFF and ship our own typed
4689    // `heartbeat` event instead. The client distinguishes the two by
4690    // looking at the SSE `event:` field — typed heartbeats let solo-web
4691    // surface "connection healthy" in its UI without parsing comment
4692    // lines.
4693    Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)))
4694}
4695
4696/// Per-subscriber state threaded through `futures::stream::unfold`.
4697/// Carries the receiver + heartbeat interval + a one-shot flag for
4698/// the initial `init` event.
4699struct StreamState {
4700    rx: broadcast::Receiver<InvalidateEvent>,
4701    heartbeat: tokio::time::Interval,
4702    tenant_id: String,
4703    /// `true` until the first poll completes — used to gate the `init`
4704    /// event. Flipped to `false` after the init event yields.
4705    needs_init: bool,
4706}
4707
4708/// Build the stream of SSE [`Event`]s for one subscriber.
4709///
4710/// First yield is the `init` event. After that, the stream selects
4711/// between the broadcast receiver and a tokio interval timer that
4712/// fires every `heartbeat_secs` seconds. Lagged broadcast errors are
4713/// swallowed with a single `tracing::warn!` line — the client resyncs
4714/// on the next real invalidate (invalidation events are idempotent).
4715fn build_invalidate_stream(
4716    rx: broadcast::Receiver<InvalidateEvent>,
4717    tenant_id: String,
4718    heartbeat_secs: u64,
4719) -> impl Stream<Item = Result<Event, Infallible>> {
4720    // `tokio::time::interval_at(start, period)` starts ticking at
4721    // `start`; we set `start = now + period` so the first heartbeat
4722    // lands `heartbeat_secs` AFTER the init event. Without `interval_at`
4723    // the default `interval()` would fire immediately at t=0, racing
4724    // the init event.
4725    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
4726    let heartbeat =
4727        tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));
4728
4729    let state = StreamState {
4730        rx,
4731        heartbeat,
4732        tenant_id,
4733        needs_init: true,
4734    };
4735    futures::stream::unfold(state, move |mut state| async move {
4736        // First-poll: yield the init event without touching the
4737        // receiver or the heartbeat. Subsequent polls fall through to
4738        // the select loop.
4739        if state.needs_init {
4740            state.needs_init = false;
4741            let init_payload = serde_json::json!({
4742                "connected": true,
4743                "tenant_id": state.tenant_id,
4744                "ts_ms": chrono::Utc::now().timestamp_millis(),
4745            });
4746            let ev = Event::default()
4747                .event(STREAM_EVENT_INIT)
4748                .json_data(init_payload)
4749                .unwrap_or_else(|_| Event::default().event(STREAM_EVENT_INIT));
4750            return Some((Ok::<Event, Infallible>(ev), state));
4751        }
4752        loop {
4753            tokio::select! {
4754                event = state.rx.recv() => {
4755                    match event {
4756                        Ok(ev) => {
4757                            let sse_event = Event::default()
4758                                .event(STREAM_EVENT_INVALIDATE)
4759                                .json_data(&ev)
4760                                .unwrap_or_else(|_| Event::default()
4761                                    .event(STREAM_EVENT_INVALIDATE));
4762                            return Some((Ok::<Event, Infallible>(sse_event), state));
4763                        }
4764                        Err(broadcast::error::RecvError::Lagged(n)) => {
4765                            tracing::warn!(
4766                                lagged = n,
4767                                "graph stream subscriber lagged; client will \
4768                                 resync on the next real invalidate"
4769                            );
4770                            // Continue receiving — do NOT yield anything
4771                            // for a lag.
4772                        }
4773                        Err(broadcast::error::RecvError::Closed) => {
4774                            tracing::debug!(
4775                                "graph stream broadcast closed; ending SSE stream"
4776                            );
4777                            return None;
4778                        }
4779                    }
4780                }
4781                _ = state.heartbeat.tick() => {
4782                    let hb_payload = serde_json::json!({
4783                        "ts_ms": chrono::Utc::now().timestamp_millis(),
4784                    });
4785                    let sse_event = Event::default()
4786                        .event(STREAM_EVENT_HEARTBEAT)
4787                        .json_data(hb_payload)
4788                        .unwrap_or_else(|_| Event::default()
4789                            .event(STREAM_EVENT_HEARTBEAT));
4790                    return Some((Ok::<Event, Infallible>(sse_event), state));
4791                }
4792            }
4793        }
4794    })
4795}
4796
4797// ---------------------------------------------------------------------------
4798// /v1/tenants — principal-scoped tenant list (v0.10.0 + v0.10.1 hydration)
4799//
4800// Powers solo-web's top-bar tenant picker (Decision F in
4801// `docs/dev-log/0105-solo-web-scoping.md` §3, route shape locked in §4
4802// Route 6). The endpoint is **read-only**; admin CRUD (create / delete /
4803// rename / quota change) remains CLI-only per ADR-0004 §"Admin operations".
4804// That keeps the privileged tenant-mutation surface off HTTP entirely
4805// while still letting an authenticated browser session enumerate the
4806// tenants it's allowed to see.
4807//
4808// Wire shape (200 OK):
4809//
4810//   ```json
4811//   {
4812//     "tenants": [
4813//       {
4814//         "id": "default",
4815//         "display_name": "Default tenant",
4816//         "created_at_ms": 1715625600000,
4817//         "last_accessed_ms": 1715625900000,
4818//         "status": "active",
4819//         "quota_bytes": null,
4820//         "episode_count": null,
4821//         "size_bytes": null,
4822//         "pct_used": null
4823//       }
4824//     ]
4825//   }
4826//   ```
4827//
4828// The numeric `episode_count` / `size_bytes` / `pct_used` fields were
4829// **always `null` in v0.10.0** (cost-deferred). v0.10.1 hydrates them
4830// for real via `TenantRegistry::hydrate_tenant_cost_numbers`:
4831//
4832//   * `size_bytes` — `std::fs::metadata(<data_dir>/tenants/<db>.db).len()`.
4833//     Cheap; runs for every visible tenant.
4834//   * `episode_count` — `SELECT COUNT(*) FROM episodes WHERE
4835//     status='active'` against the per-tenant SQLCipher DB.
4836//   * `pct_used` — `size_bytes * 100 / quota_bytes` (f64, capped at
4837//     100.0) when both are known; `null` if `quota_bytes` is unset.
4838//
4839// **Cap**: opening + counting N tenant DBs is N×~10ms; the first-paint
4840// budget is tight, so we cap `episode_count` hydration at
4841// `TENANTS_COUNT_HYDRATION_CAP` (50) per request. Tenants beyond the
4842// cap get `episode_count: null` and the response carries an
4843// `X-Solo-Tenants-Count-Cap-Reached: true` header so clients can fetch
4844// counts for the tail tenants out-of-band if needed (mirroring the
4845// entity-cap pattern from `/v1/graph/nodes`). `size_bytes` is not
4846// capped — it's just a `metadata` call.
4847//
4848// The CLI's `solo tenants list` retains the canonical per-tenant
4849// cost-numbers path for operators who need exhaustive data.
4850//
4851// ## Visibility filter (load-bearing — three cases)
4852//
4853// The handler reads `AuthenticatedPrincipal` out of request extensions
4854// via `MaybePrincipal` and filters the registry list before
4855// serialisation:
4856//
4857//   1. **No principal** (`MaybePrincipal(None)`) — unauthenticated
4858//      loopback path, no `[auth]` block in `solo.config.toml`. Return
4859//      every `Active` tenant. Same scope as `solo tenants list` CLI.
4860//   2. **Bearer principal** (`subject == "bearer" && claims.is_null()`,
4861//      the `AuthenticatedPrincipal::bearer` signature emitted by
4862//      `BearerValidator::validate`). Single-principal daemon — the
4863//      bearer holder is the operator, so return every `Active`
4864//      tenant. Functionally equivalent to (1) from a leakage
4865//      standpoint.
4866//   3. **OIDC principal** (any other principal — `claims` carries the
4867//      JWT object). Filter to ONLY the tenant id matching
4868//      `principal.tenant_claim`. The configured OIDC tenant_claim is
4869//      already validated to a real `TenantId` by the auth middleware
4870//      (a `MissingTenantClaim` or `InvalidTenantClaim` shorts out at
4871//      403 BEFORE this handler runs). If the claim doesn't match any
4872//      registered tenant, return `{"tenants": []}` (200 OK, NOT 404)
4873//      — don't leak whether a tenant exists by 404'ing on names
4874//      outside the principal's scope.
4875//
4876// `PendingMigration` / `PendingDelete` tenants are **excluded** from the
4877// list in every case. solo-web's tenant picker should not surface a
4878// tenant that's mid-migration or queued for hard-delete — clicking
4879// such a row would race the admin tooling. The CLI's `solo tenants
4880// list` still shows them under an explicit `--include-pending` flag
4881// (out of scope here).
4882//
4883// See `docs/dev-log/0119-tenants-list-impl.md` for the full design.
4884// ---------------------------------------------------------------------------
4885
4886/// One row of the `/v1/tenants` response body. Shape mirrors
4887/// `solo_storage::TenantRecord` for the persisted fields plus the
4888/// reserved-for-future cost-numbers triple (`episode_count`,
4889/// `size_bytes`, `pct_used`) that v0.10.0 always sets to `null`.
4890#[derive(Debug, Clone, Serialize)]
4891struct TenantListItem {
4892    /// Tenant id (e.g. `"default"`, `"alice"`). Matches the
4893    /// `X-Solo-Tenant` header value clients send to other routes.
4894    id: String,
4895    /// Human-readable display name set at `solo tenants create`.
4896    /// `None` ⇒ omit from the JSON body.
4897    #[serde(skip_serializing_if = "Option::is_none")]
4898    display_name: Option<String>,
4899    /// Epoch ms when this tenant was registered.
4900    created_at_ms: i64,
4901    /// Epoch ms of the most recent `TenantRegistry::get_or_open` call
4902    /// (v0.9.0 P1). `None` for tenants that have never been opened
4903    /// since the migration ran.
4904    #[serde(skip_serializing_if = "Option::is_none")]
4905    last_accessed_ms: Option<i64>,
4906    /// Lifecycle status. Always `"active"` in the v0.10.0 wire (we
4907    /// filter `PendingMigration` / `PendingDelete` out at list time).
4908    /// Surfaced for forward-compat — a future `?include_pending=1`
4909    /// query param could relax the filter without a shape change.
4910    status: TenantStatusJson,
4911    /// Per-tenant byte quota set via `solo tenants set-quota`. `None`
4912    /// ⇒ unlimited.
4913    #[serde(skip_serializing_if = "Option::is_none")]
4914    quota_bytes: Option<u64>,
4915    /// v0.10.1: count of `episodes WHERE status='active'`. Populated
4916    /// for the first `TENANTS_COUNT_HYDRATION_CAP` tenants in the
4917    /// response; `null` for tenants beyond the cap (in which case the
4918    /// response also carries `X-Solo-Tenants-Count-Cap-Reached: true`).
4919    /// Also `null` if the per-tenant DB file is missing or the COUNT
4920    /// failed.
4921    episode_count: Option<i64>,
4922    /// v0.10.1: size of the per-tenant SQLCipher DB on disk (bytes).
4923    /// `null` only if the file is missing or unreadable (corruption /
4924    /// permissions). Not affected by the cap — `std::fs::metadata` is
4925    /// cheap.
4926    size_bytes: Option<u64>,
4927    /// v0.10.1: `(size_bytes * 100.0 / quota_bytes)` capped at `100.0`
4928    /// when both `size_bytes` and `quota_bytes` are known. `null` if
4929    /// `quota_bytes` is unset (no quota = unlimited) or `size_bytes`
4930    /// is unknown.
4931    pct_used: Option<f64>,
4932}
4933
4934/// JSON-side mirror of [`TenantStatus`]. Re-defined here (rather than
4935/// using `#[derive(Serialize)]` on `TenantStatus` directly — which it
4936/// already has via `#[serde(rename_all = "snake_case")]`) so the
4937/// HTTP-side wire shape stays decoupled from the storage-side enum.
4938/// Today both serialise identically; a future status variant added to
4939/// storage doesn't automatically leak onto the wire.
4940#[derive(Debug, Clone, Copy, Serialize)]
4941#[serde(rename_all = "snake_case")]
4942enum TenantStatusJson {
4943    Active,
4944}
4945
4946impl From<&solo_storage::TenantStatus> for TenantStatusJson {
4947    fn from(s: &solo_storage::TenantStatus) -> Self {
4948        // We only ever build this enum from `Active` records (the list
4949        // handler filters at source); the match exhausts so future
4950        // variants force a compile error here, not a wire mismatch.
4951        match s {
4952            solo_storage::TenantStatus::Active => TenantStatusJson::Active,
4953            // Defensive: should be filtered upstream. Map to Active to
4954            // avoid a panic, but the handler MUST keep filtering at
4955            // source. A clippy warning catches dead branches.
4956            solo_storage::TenantStatus::PendingMigration
4957            | solo_storage::TenantStatus::PendingDelete => TenantStatusJson::Active,
4958        }
4959    }
4960}
4961
4962/// Response body for `GET /v1/tenants`.
4963#[derive(Debug, Serialize)]
4964struct TenantsListResponse {
4965    tenants: Vec<TenantListItem>,
4966}
4967
4968/// v0.10.1: maximum number of tenants whose `episode_count` we hydrate
4969/// per `/v1/tenants` request. Opening + counting one tenant DB is
4970/// ~5-10ms; capping bounds the per-request wall to keep solo-web's
4971/// first-paint budget tight. Tenants beyond the cap get
4972/// `episode_count: null` AND the response carries
4973/// `X-Solo-Tenants-Count-Cap-Reached: true` so clients can fetch
4974/// per-tenant counts out-of-band (CLI / future per-id endpoint) for
4975/// the tail. The 50 figure mirrors the entity-cap pattern from
4976/// `/v1/graph/nodes`.
4977const TENANTS_COUNT_HYDRATION_CAP: usize = 50;
4978
4979/// v0.10.1: response header name set to `"true"` when the per-request
4980/// `episode_count` hydration cap was reached. Absent otherwise.
4981/// Grep-able by both server- and client-side code. Stored lowercase
4982/// per `axum::http::HeaderName::from_static` (header names are
4983/// case-insensitive on the wire; the canonical spelling is
4984/// `X-Solo-Tenants-Count-Cap-Reached`).
4985const X_SOLO_TENANTS_COUNT_CAP_HEADER: &str = "x-solo-tenants-count-cap-reached";
4986
4987/// `GET /v1/tenants` — list every tenant visible to the request's
4988/// principal. See module comment for the three-case visibility rule.
4989///
4990/// Errors:
4991///   * **401** — bearer required but missing/invalid (handled by
4992///     `auth_middleware` before this handler runs).
4993///   * **500** — `TenantsIndex` read failed. Surfaced via [`ApiError`].
4994///
4995/// No 404 path. If the OIDC principal's `tenant_claim` doesn't match
4996/// any registered tenant, the response is `200 OK` with `tenants:
4997/// []`. That keeps tenant existence out of side-channel range for an
4998/// OIDC user — they cannot probe for other tenants by id.
4999async fn tenants_list_handler(
5000    State(state): State<SoloHttpState>,
5001    MaybePrincipal(maybe_principal): MaybePrincipal,
5002) -> Result<Response, ApiError> {
5003    // Pull every registered tenant. `list_active` is the registry's
5004    // wrapper around `TenantsIndex::list`, which returns rows ordered
5005    // by `(created_at_ms ASC, tenant_id ASC)` — a stable order that
5006    // doesn't shift between requests, which solo-web relies on to keep
5007    // its tenant picker entries from reordering visually.
5008    let mut records = state.registry.list_active().await.map_err(ApiError::from)?;
5009
5010    // Filter at source: status MUST be Active (PendingMigration /
5011    // PendingDelete are admin-transient states that solo-web should
5012    // not surface). Matches the brief's
5013    // `tenants_status_filter_excludes_deleted` test.
5014    records.retain(|r| matches!(r.status, solo_storage::TenantStatus::Active));
5015
5016    // Apply the principal-driven visibility filter. The three cases
5017    // are exhaustive — see the module comment for the rationale on
5018    // each. `tenant_visibility_filter` is split out so the unit
5019    // tests can assert the rule independent of the SQL read.
5020    let filtered = filter_tenants_for_principal(records, maybe_principal.as_ref());
5021
5022    // v0.10.1: hydrate cost numbers (size_bytes, episode_count). The
5023    // registry helper handles missing DB files + the cap behavior. We
5024    // pass the cap so tenants beyond it return `None` for episode_count
5025    // — `size_bytes` is computed for everyone (cheap fs::metadata).
5026    let cap = TENANTS_COUNT_HYDRATION_CAP;
5027    let costs = state
5028        .registry
5029        .hydrate_tenant_cost_numbers(&filtered, cap)
5030        .await;
5031    let cap_reached = filtered.len() > cap;
5032
5033    let tenants: Vec<TenantListItem> = filtered
5034        .iter()
5035        .zip(costs.iter())
5036        .map(|(r, cost)| {
5037            let pct_used = match (cost.size_bytes, r.quota_bytes) {
5038                (Some(size), Some(quota)) if quota > 0 => {
5039                    let raw = (size as f64) * 100.0 / (quota as f64);
5040                    Some(raw.min(100.0))
5041                }
5042                _ => None,
5043            };
5044            TenantListItem {
5045                id: r.tenant_id.to_string(),
5046                display_name: r.display_name.clone(),
5047                created_at_ms: r.created_at_ms,
5048                last_accessed_ms: r.last_accessed_ms,
5049                status: TenantStatusJson::from(&r.status),
5050                quota_bytes: r.quota_bytes,
5051                episode_count: cost.episode_count,
5052                size_bytes: cost.size_bytes,
5053                pct_used,
5054            }
5055        })
5056        .collect();
5057
5058    let body = Json(TenantsListResponse { tenants });
5059    if cap_reached {
5060        let mut resp = body.into_response();
5061        resp.headers_mut().insert(
5062            axum::http::HeaderName::from_static(X_SOLO_TENANTS_COUNT_CAP_HEADER),
5063            axum::http::HeaderValue::from_static("true"),
5064        );
5065        Ok(resp)
5066    } else {
5067        Ok(body.into_response())
5068    }
5069}
5070
5071/// Pure function: apply the three-case principal-driven visibility
5072/// rule to a list of `TenantRecord`s. Extracted from the handler so
5073/// unit tests can exercise the rule without driving an axum router.
5074///
5075///   * `principal == None` ⇒ all records returned (no-auth path).
5076///   * Bearer-shaped principal (`subject == "bearer" && claims.is_null()`)
5077///     ⇒ all records returned (single-principal daemon).
5078///   * Any other principal (OIDC) ⇒ filter to records whose
5079///     `tenant_id == principal.tenant_claim`. An OIDC principal with
5080///     no `tenant_claim` (theoretically unreachable — the middleware
5081///     short-circuits at 403 before us, but we defend) returns an
5082///     empty list.
5083fn filter_tenants_for_principal(
5084    records: Vec<solo_storage::TenantRecord>,
5085    principal: Option<&AuthenticatedPrincipal>,
5086) -> Vec<solo_storage::TenantRecord> {
5087    let Some(p) = principal else {
5088        // Case 1: no auth configured — return all tenants. Same scope
5089        // as `solo tenants list`.
5090        return records;
5091    };
5092    if is_single_principal_bearer(p) {
5093        // Case 2: bearer principal — return all tenants. The single
5094        // bearer holder is functionally the daemon operator.
5095        return records;
5096    }
5097    // Case 3: OIDC principal — filter to the claimed tenant only. An
5098    // unmatched claim falls through to an empty list, NOT 404, to
5099    // avoid leaking tenant existence.
5100    let Some(claim) = p.tenant_claim.as_ref() else {
5101        return Vec::new();
5102    };
5103    records
5104        .into_iter()
5105        .filter(|r| r.tenant_id == *claim)
5106        .collect()
5107}
5108
5109/// True iff `principal` looks like a bearer-mode principal — the shape
5110/// emitted by [`AuthenticatedPrincipal::bearer`]: subject is literally
5111/// `"bearer"`, claims is `serde_json::Value::Null`, and scopes is
5112/// empty. OIDC principals carry a JWT object in `claims` and the JWT
5113/// `sub` in `subject`, so they fail this predicate.
5114///
5115/// Split out so the unit tests can assert the discriminator
5116/// independent of the rest of the handler. Keeping the predicate in
5117/// one place also makes future expansion easier — e.g., a v0.11
5118/// "admin scope" might add an OIDC variant that passes this gate by
5119/// looking for a `"solo:admin"` entry in `scopes`.
5120fn is_single_principal_bearer(principal: &AuthenticatedPrincipal) -> bool {
5121    principal.subject == "bearer"
5122        && principal.claims.is_null()
5123        && principal.scopes.is_empty()
5124}
5125
5126// ---------------------------------------------------------------------------
5127// v0.10.2 — MCP-over-HTTP transport on /mcp
5128// ---------------------------------------------------------------------------
5129
5130/// Initial event name emitted by `GET /mcp` when an SSE client connects.
5131/// Used by browser-based MCP clients (e.g. the AI SDK's
5132/// `experimental_createMCPClient` with the SSE transport) to confirm the
5133/// stream is live before they begin polling for server-initiated
5134/// notifications. v0.10.2 keeps the stream idle after this event —
5135/// server-initiated notifications come in v0.10.3+.
5136pub const MCP_STREAM_EVENT_INIT: &str = "init";
5137
5138/// `POST /mcp` — JSON-RPC request/response.
5139///
5140/// v0.10.2 P2 entry point. Per the MCP Streamable HTTP transport spec,
5141/// the body is one JSON-RPC 2.0 envelope (`{jsonrpc, id, method,
5142/// params}`). The response is one JSON-RPC envelope (`{jsonrpc, id,
5143/// result}` or `{jsonrpc, id, error}`) with `Content-Type:
5144/// application/json`. **Status 200** for valid JSON-RPC (in-body
5145/// errors); **status 400** for malformed JSON; **status 401** when
5146/// auth is configured and the bearer check fails (handled by the
5147/// `auth_middleware` ahead of this handler).
5148///
5149/// Tenant resolution diverges from `solo mcp-stdio` here: stdio binds
5150/// one tenant at process start via `--tenant`. HTTP resolves the tenant
5151/// per request from the `X-Solo-Tenant` header (or
5152/// `AuthenticatedPrincipal.tenant_claim` in OIDC mode), so a single
5153/// daemon process can answer MCP calls for any tenant the registry
5154/// knows. The audit principal is `Some("bearer")` for bearer-
5155/// authenticated calls and the JWT `sub` for OIDC; `None` for
5156/// unauthenticated loopback. Documented in v0.10.2 dev log.
5157async fn mcp_http_post_handler(
5158    TenantExtractor(tenant): TenantExtractor,
5159    State(state): State<SoloHttpState>,
5160    AuditPrincipal(principal): AuditPrincipal,
5161    body: axum::body::Bytes,
5162) -> Response {
5163    // Parse the JSON-RPC envelope. Malformed input ⇒ 400 (the spec
5164    // calls out 4xx for malformed wire input even though JSON-RPC's own
5165    // parse-error code is in-body — operator-facing tooling needs the
5166    // HTTP status to distinguish "the server rejected the request
5167    // shape" from "the method returned an error").
5168    let request: crate::mcp_dispatch::JsonRpcRequest = match serde_json::from_slice(&body) {
5169        Ok(r) => r,
5170        Err(e) => {
5171            return (
5172                StatusCode::BAD_REQUEST,
5173                Json(serde_json::json!({
5174                    "error": format!("invalid JSON-RPC request: {e}"),
5175                    "status": 400,
5176                })),
5177            )
5178                .into_response();
5179        }
5180    };
5181    if request.jsonrpc != "2.0" {
5182        return (
5183            StatusCode::BAD_REQUEST,
5184            Json(serde_json::json!({
5185                "error": format!(
5186                    "invalid JSON-RPC request: expected jsonrpc=\"2.0\", got {:?}",
5187                    request.jsonrpc
5188                ),
5189                "status": 400,
5190            })),
5191        )
5192            .into_response();
5193    }
5194
5195    // Build the dispatcher with the resolved tenant + audit principal.
5196    let dispatcher = crate::mcp_dispatch::McpDispatcher::new(
5197        state.registry.clone(),
5198        tenant,
5199        (*state.user_aliases).clone(),
5200        principal,
5201    );
5202
5203    match dispatcher.dispatch(request).await {
5204        Some(response) => {
5205            // JSON-RPC errors are in-body; the HTTP status is 200 for
5206            // any valid JSON-RPC request, including ones that return an
5207            // error envelope. The client distinguishes success from
5208            // error by the presence of `result` vs `error` in the body.
5209            (StatusCode::OK, Json(response)).into_response()
5210        }
5211        None => {
5212            // Notification: per JSON-RPC 2.0 §4.1 the server MUST NOT
5213            // respond. The MCP Streamable HTTP transport spec uses
5214            // 202 Accepted for this shape so client-side polling does
5215            // not block on a body.
5216            StatusCode::ACCEPTED.into_response()
5217        }
5218    }
5219}
5220
5221/// `GET /mcp` — SSE init stream.
5222///
5223/// Per the MCP Streamable HTTP transport spec, the GET endpoint is an
5224/// SSE stream the server can use to push server-initiated messages
5225/// (notifications, progress events, sampling requests). v0.10.2 keeps
5226/// this minimal: emit one `event: init` on connect, then idle. Real
5227/// server-initiated traffic ships in v0.10.3+ when sessions land.
5228///
5229/// The stream lives behind `TenantExtractor` so the connection still
5230/// binds a tenant (browser clients open this once per tab) — future
5231/// session affinity will tie session ids to the tenant resolved here.
5232async fn mcp_http_get_handler(
5233    TenantExtractor(tenant): TenantExtractor,
5234) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
5235    let tenant_id = tenant.tenant_id().to_string();
5236    let stream = build_mcp_init_stream(tenant_id);
5237    // No keep-alive comment lines — clients distinguish the typed
5238    // `init` event from any future typed `heartbeat` event the way
5239    // `/v1/graph/stream` already does. Matching the longer interval
5240    // here (3600s) means the stream parks indefinitely after the init
5241    // event in v0.10.2; v0.10.3+ wires real notifications.
5242    Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)))
5243}
5244
5245/// Build the v0.10.2 `/mcp` GET stream: one `event: init` then idle.
5246fn build_mcp_init_stream(
5247    tenant_id: String,
5248) -> impl Stream<Item = Result<Event, Infallible>> {
5249    futures::stream::unfold(Some(tenant_id), move |state| async move {
5250        let Some(tenant_id) = state else {
5251            // Park the stream forever once init has been emitted.
5252            // Future versions will replace this with a real
5253            // notification-receiver `select!` loop.
5254            std::future::pending::<()>().await;
5255            return None;
5256        };
5257        let init_payload = serde_json::json!({
5258            "connected": true,
5259            "tenant_id": tenant_id,
5260            "ts_ms": chrono::Utc::now().timestamp_millis(),
5261        });
5262        let ev = Event::default()
5263            .event(MCP_STREAM_EVENT_INIT)
5264            .json_data(init_payload)
5265            .unwrap_or_else(|_| Event::default().event(MCP_STREAM_EVENT_INIT));
5266        Some((Ok::<Event, Infallible>(ev), None))
5267    })
5268}
5269
5270// ---------------------------------------------------------------------------
5271// Error mapping
5272// ---------------------------------------------------------------------------
5273
5274#[derive(Debug)]
5275pub struct ApiError {
5276    status: StatusCode,
5277    message: String,
5278}
5279
5280impl ApiError {
5281    fn bad_request(msg: impl Into<String>) -> Self {
5282        Self {
5283            status: StatusCode::BAD_REQUEST,
5284            message: msg.into(),
5285        }
5286    }
5287    fn not_found(msg: impl Into<String>) -> Self {
5288        Self {
5289            status: StatusCode::NOT_FOUND,
5290            message: msg.into(),
5291        }
5292    }
5293    fn internal(msg: impl Into<String>) -> Self {
5294        Self {
5295            status: StatusCode::INTERNAL_SERVER_ERROR,
5296            message: msg.into(),
5297        }
5298    }
5299}
5300
5301impl From<solo_core::Error> for ApiError {
5302    fn from(e: solo_core::Error) -> Self {
5303        use solo_core::Error;
5304        match e {
5305            Error::NotFound(msg) => ApiError::not_found(msg),
5306            Error::InvalidInput(msg) => ApiError::bad_request(msg),
5307            Error::Conflict(msg) => Self {
5308                status: StatusCode::CONFLICT,
5309                message: msg,
5310            },
5311            other => ApiError::internal(other.to_string()),
5312        }
5313    }
5314}
5315
5316impl IntoResponse for ApiError {
5317    fn into_response(self) -> Response {
5318        let body = serde_json::json!({
5319            "error": self.message,
5320            "status": self.status.as_u16(),
5321        });
5322        (self.status, Json(body)).into_response()
5323    }
5324}
5325
5326// SQL helper for recall used to live here; consolidated into
5327// solo_query::recall.
5328
5329#[cfg(test)]
5330mod handler_tests {
5331    //! In-process integration tests for the HTTP handler surface. We
5332    //! drive the axum Router directly via `tower::ServiceExt::oneshot`
5333    //! — no real TCP listener needed. Same `Harness`-shape as the MCP
5334    //! tests: real WriterActor + ReaderPool + StubEmbedder + StubVectorIndex.
5335    //!
5336    //! Tests live inline in this module rather than in a `tests/` dir
5337    //! because external integration-test exes triggered Windows UAC
5338    //! ERROR_ELEVATION_REQUIRED on the dev machine.
5339    use super::*;
5340    use axum::body::Body;
5341    use axum::http::{Request, StatusCode};
5342    use http_body_util::BodyExt;
5343    use serde_json::{Value, json};
5344    use solo_storage::test_support::StubVectorIndex;
5345    use solo_storage::{
5346        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig,
5347        StubEmbedder, TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
5348    };
5349    use solo_core::VectorIndex;
5350    use std::sync::Arc as StdArc;
5351    use tower::ServiceExt;
5352
5353    fn fake_config(dim: u32) -> SoloConfig {
5354        SoloConfig {
5355            schema_version: 1,
5356            salt_hex: "00000000000000000000000000000000".to_string(),
5357            embedder: EmbedderConfig {
5358                name: "stub".to_string(),
5359                version: "v1".to_string(),
5360                dim,
5361                dtype: "f32".to_string(),
5362            },
5363            identity: IdentityConfig::default(),
5364            documents: solo_storage::DocumentConfig::default(),
5365            auth: None,
5366            audit: solo_storage::AuditSettings::default(),
5367            redaction: solo_storage::RedactionConfig::default(),
5368            llm: None,
5369            triples: solo_storage::TriplesConfig::default(),
5370            sampling: solo_storage::SamplingConfig::default(),
5371        }
5372    }
5373
5374    struct Harness {
5375        router: axum::Router,
5376        _tmp: tempfile::TempDir,
5377        db_path: std::path::PathBuf,
5378        write_handle_extra: Option<solo_storage::WriteHandle>,
5379        join: Option<std::thread::JoinHandle<()>>,
5380        /// v0.10.0: handle to the per-tenant TenantHandle so SSE-flavoured
5381        /// tests can call `harness.invalidate_sender().send(...)` to
5382        /// simulate writer-actor invalidations (or grab a Receiver via
5383        /// `.subscribe()` for subscriber-count assertions).
5384        tenant_handle: StdArc<TenantHandle>,
5385        /// v0.10.0: clone of the registry Arc so `/v1/tenants` tests can
5386        /// seed additional tenant rows into the in-memory tenants_index
5387        /// stub via `registry.with_index(|idx| idx.register(...))`.
5388        registry: StdArc<TenantRegistry>,
5389    }
5390
5391    impl Harness {
5392        /// v0.10.0: clone the per-tenant broadcast Sender so tests can
5393        /// fire `InvalidateEvent`s directly without going through the
5394        /// writer-actor. The harness's writer is spawned via
5395        /// `WriterActor::spawn_full` (legacy variant, no invalidate
5396        /// plumb) so writer-driven events won't reach SSE subscribers
5397        /// in tests — tests use this Sender to simulate them.
5398        fn invalidate_sender(&self) -> tokio::sync::broadcast::Sender<InvalidateEvent> {
5399            self.tenant_handle.invalidate_sender().clone()
5400        }
5401    }
5402
5403    impl Harness {
5404        fn new(runtime: &tokio::runtime::Runtime) -> Self {
5405            Self::new_with_auth(runtime, None)
5406        }
5407
5408        /// Open a fresh side connection against the harness's DB. Used
5409        /// by graph_expand tests to seed clusters / triples / documents
5410        /// directly (the writer-actor doesn't expose those write paths).
5411        fn open_db(&self) -> rusqlite::Connection {
5412            solo_storage::test_support::open_test_db_at(&self.db_path)
5413        }
5414
5415        fn new_with_auth(
5416            runtime: &tokio::runtime::Runtime,
5417            bearer_token: Option<String>,
5418        ) -> Self {
5419            Self::new_with_auth_config(
5420                runtime,
5421                bearer_token.map(|token| crate::auth::AuthConfig::Bearer { token }),
5422            )
5423        }
5424
5425        fn new_with_auth_config(
5426            runtime: &tokio::runtime::Runtime,
5427            auth: Option<crate::auth::AuthConfig>,
5428        ) -> Self {
5429            use solo_storage::embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};
5430
5431            let tmp = tempfile::TempDir::new().unwrap();
5432            let dim = 16usize;
5433            let hnsw: StdArc<dyn VectorIndex + Send + Sync> = StdArc::new(StubVectorIndex::new(dim));
5434            let embedder: StdArc<dyn solo_core::Embedder> =
5435                StdArc::new(StubEmbedder::new("stub", "v1", dim));
5436            let path = tmp.path().join("test.db");
5437
5438            let embedder_id = {
5439                let conn = solo_storage::test_support::open_test_db_at(&path);
5440                get_or_insert_embedder_id(
5441                    &conn,
5442                    &EmbedderIdentity {
5443                        name: "stub".into(),
5444                        version: "v1".into(),
5445                        dim: dim as u32,
5446                        dtype: "f32".into(),
5447                    },
5448                )
5449                .unwrap()
5450            };
5451
5452            let conn = solo_storage::test_support::open_test_db_at(&path);
5453            let WriterSpawn { handle, join } = WriterActor::spawn_full(
5454                conn,
5455                hnsw.clone(),
5456                tmp.path().to_path_buf(),
5457                embedder_id,
5458            );
5459            let pool: ReaderPool =
5460                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });
5461
5462            // Build a TenantHandle from the assembled parts and wrap it
5463            // in a single-tenant test registry.
5464            let tenant_id = solo_core::TenantId::default_tenant();
5465            let tenant_handle = StdArc::new(
5466                TenantHandle::from_parts_for_tests(
5467                    tenant_id.clone(),
5468                    fake_config(dim as u32),
5469                    path.clone(),
5470                    tmp.path().to_path_buf(),
5471                    embedder_id,
5472                    hnsw,
5473                    embedder.clone(),
5474                    handle.clone(),
5475                    // The harness owns ANOTHER WriteHandle clone + the join.
5476                    // We give the TenantHandle a dummy join that immediately
5477                    // returns — it never gets joined because shutdown_all
5478                    // can't get exclusive Arc ownership when the harness
5479                    // also holds a writer clone.
5480                    std::thread::spawn(|| {}),
5481                    pool,
5482                ),
5483            );
5484            let tenant_handle_clone = tenant_handle.clone();
5485
5486            // Suppress the auto-spawned dummy thread by letting it finish.
5487            // We DON'T put the real `join` into the TenantHandle because
5488            // we keep our own clone of `handle` for the shutdown path.
5489            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
5490            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
5491                tmp.path().to_path_buf(),
5492                key,
5493                embedder,
5494                tenant_handle,
5495            ));
5496            let registry_clone = registry.clone();
5497
5498            let state = SoloHttpState {
5499                registry,
5500                default_tenant: tenant_id,
5501                user_aliases: Arc::new(Vec::new()),
5502            };
5503            let router = router_with_auth_config(state, auth);
5504            Harness {
5505                router,
5506                _tmp: tmp,
5507                db_path: path,
5508                write_handle_extra: Some(handle),
5509                join: Some(join),
5510                tenant_handle: tenant_handle_clone,
5511                registry: registry_clone,
5512            }
5513        }
5514
5515        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
5516            let join = self.join.take();
5517            let extra = self.write_handle_extra.take();
5518            // v0.10.0: the new `tenant_handle` Harness field holds another
5519            // `Arc<TenantHandle>` that owns its own WriteHandle clone.
5520            // We must drop our reference here so the inner WriteHandle
5521            // can be released when the registry drops below. Without
5522            // this, the writer thread's mpsc never closes and the join
5523            // times out at 5s.
5524            let tenant_handle = self.tenant_handle;
5525            // v0.10.0: same story for the new `registry` Arc clone the
5526            // tenants-list tests use to seed extra index rows — the
5527            // state inside the router holds one Arc, this is the
5528            // other; both must drop before the underlying registry
5529            // dies and releases its index-mutex / cached handles.
5530            let registry = self.registry;
5531            runtime.block_on(async move {
5532                drop(extra);
5533                drop(tenant_handle); // drop Harness's direct tenant Arc
5534                drop(registry); // drop Harness's direct registry Arc
5535                drop(self.router); // drops state → drops pool inside runtime ctx
5536                drop(self._tmp);
5537                if let Some(join) = join {
5538                    let (tx, rx) = std::sync::mpsc::channel();
5539                    std::thread::spawn(move || {
5540                        let _ = tx.send(join.join());
5541                    });
5542                    tokio::task::spawn_blocking(move || {
5543                        rx.recv_timeout(std::time::Duration::from_secs(5))
5544                    })
5545                    .await
5546                    .expect("blocking task")
5547                    .expect("writer thread did not exit within 5s")
5548                    .expect("writer thread panicked");
5549                }
5550            });
5551        }
5552    }
5553
5554    fn rt() -> tokio::runtime::Runtime {
5555        tokio::runtime::Builder::new_multi_thread()
5556            .worker_threads(2)
5557            .enable_all()
5558            .build()
5559            .unwrap()
5560    }
5561
5562    /// Issue one HTTP request through the router and capture status +
5563    /// JSON body. `body` may be `None` for GET/DELETE; `auth` adds an
5564    /// `Authorization` header value verbatim (e.g. `"Bearer xyz"`).
5565    async fn call(
5566        router: axum::Router,
5567        method: &str,
5568        uri: &str,
5569        body: Option<Value>,
5570    ) -> (StatusCode, Value) {
5571        call_with_auth(router, method, uri, body, None).await
5572    }
5573
5574    async fn call_with_auth(
5575        router: axum::Router,
5576        method: &str,
5577        uri: &str,
5578        body: Option<Value>,
5579        auth: Option<&str>,
5580    ) -> (StatusCode, Value) {
5581        let mut req_builder = Request::builder()
5582            .method(method)
5583            .uri(uri)
5584            .header("content-type", "application/json");
5585        if let Some(a) = auth {
5586            req_builder = req_builder.header("authorization", a);
5587        }
5588        let req = if let Some(b) = body {
5589            let bytes = serde_json::to_vec(&b).unwrap();
5590            req_builder.body(Body::from(bytes)).unwrap()
5591        } else {
5592            req_builder = req_builder.header("content-length", "0");
5593            req_builder.body(Body::empty()).unwrap()
5594        };
5595        let resp = router.oneshot(req).await.expect("oneshot");
5596        let status = resp.status();
5597        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
5598        let v: Value = if body_bytes.is_empty() {
5599            Value::Null
5600        } else {
5601            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
5602        };
5603        (status, v)
5604    }
5605
5606    #[test]
5607    fn health_returns_ok() {
5608        let runtime = rt();
5609        let h = Harness::new(&runtime);
5610        let r = h.router.clone();
5611        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
5612        assert_eq!(status, StatusCode::OK);
5613        h.shutdown(&runtime);
5614    }
5615
5616    /// `GET /openapi.json` returns a parseable OpenAPI 3.x document with
5617    /// the four `memory.*` endpoints + their request/response schemas.
5618    /// Acts as a drift detector: if a future commit adds/removes a route
5619    /// without updating `openapi_spec`, this test fails loudly.
5620    #[test]
5621    fn openapi_json_describes_all_endpoints() {
5622        let runtime = rt();
5623        let h = Harness::new(&runtime);
5624        let r = h.router.clone();
5625        let (status, spec) = runtime.block_on(call(r, "GET", "/openapi.json", None));
5626        assert_eq!(status, StatusCode::OK);
5627        assert!(spec.is_object(), "openapi.json must be a JSON object");
5628
5629        // Top-level shape per OpenAPI 3.1.
5630        assert!(
5631            spec.get("openapi")
5632                .and_then(|v| v.as_str())
5633                .is_some_and(|s| s.starts_with("3.")),
5634            "missing or wrong openapi version: {spec}"
5635        );
5636        assert!(spec.pointer("/info/title").is_some());
5637        assert!(spec.pointer("/info/version").is_some());
5638
5639        // Every route the router serves must be documented.
5640        let paths = spec
5641            .get("paths")
5642            .and_then(|v| v.as_object())
5643            .expect("paths must be an object");
5644        for expected in [
5645            "/health",
5646            "/openapi.json",
5647            "/memory",
5648            "/memory/search",
5649            "/memory/consolidate",
5650            "/memory/{id}",
5651            // Path 1 derived-layer endpoints (v0.4.0+):
5652            "/memory/themes",
5653            "/memory/facts_about",
5654            "/memory/contradictions",
5655            // v0.5.0 Priority 3:
5656            "/memory/clusters/{cluster_id}",
5657            // v0.7.0 P6 — document operations:
5658            "/memory/documents",
5659            "/memory/documents/search",
5660            "/memory/documents/{id}",
5661        ] {
5662            assert!(
5663                paths.contains_key(expected),
5664                "openapi paths missing {expected}: {paths:?}"
5665            );
5666        }
5667
5668        // Method coverage on /memory/documents: must document both POST
5669        // (ingest) and GET (list).
5670        let docs = paths.get("/memory/documents").expect("/memory/documents");
5671        assert!(docs.get("post").is_some(), "POST /memory/documents undocumented");
5672        assert!(docs.get("get").is_some(), "GET /memory/documents undocumented");
5673
5674        // Method coverage on /memory/documents/{id}: must document both
5675        // GET (inspect) and DELETE (forget).
5676        let docid = paths
5677            .get("/memory/documents/{id}")
5678            .expect("/memory/documents/{id}");
5679        assert!(
5680            docid.get("get").is_some(),
5681            "GET /memory/documents/{{id}} undocumented"
5682        );
5683        assert!(
5684            docid.get("delete").is_some(),
5685            "DELETE /memory/documents/{{id}} undocumented"
5686        );
5687
5688        // Method coverage on /memory/{id}: must document both GET (inspect)
5689        // and DELETE (forget).
5690        let memid = paths.get("/memory/{id}").expect("memory/{id}");
5691        assert!(memid.get("get").is_some(), "GET /memory/{{id}} undocumented");
5692        assert!(
5693            memid.get("delete").is_some(),
5694            "DELETE /memory/{{id}} undocumented"
5695        );
5696
5697        // Component schemas referenced from paths must be defined.
5698        for schema_name in [
5699            "RememberRequest",
5700            "RememberResponse",
5701            "RecallRequest",
5702            "RecallResult",
5703            "EpisodeRecord",
5704            "ApiError",
5705            "ConsolidationScope",
5706            "ConsolidationReport",
5707            // Path 1 derived-layer schemas (v0.4.0+):
5708            "ThemeHit",
5709            "FactHit",
5710            "ContradictionHit",
5711            // v0.5.0 Priority 3:
5712            "ClusterRecord",
5713            // v0.7.0 P6 — document schemas:
5714            "IngestDocumentRequest",
5715            "IngestReport",
5716            "ForgetDocumentReport",
5717            "SearchDocsRequest",
5718            "DocSearchHit",
5719            "DocumentInspectResult",
5720            "DocumentSummary",
5721        ] {
5722            let ptr = format!("/components/schemas/{schema_name}");
5723            assert!(
5724                spec.pointer(&ptr).is_some(),
5725                "component schema {schema_name} missing"
5726            );
5727        }
5728
5729        // bearerAuth security scheme is declared (LAN deployments need it).
5730        assert!(
5731            spec.pointer("/components/securitySchemes/bearerAuth")
5732                .is_some(),
5733            "bearerAuth security scheme missing"
5734        );
5735
5736        h.shutdown(&runtime);
5737    }
5738
5739    /// `/openapi.json` must remain unauthenticated even when bearer auth
5740    /// is enabled — the spec describes the API shape, not secrets, and
5741    /// codegen tooling shouldn't need a credential to fetch it.
5742    #[test]
5743    fn openapi_json_is_exempt_from_bearer_auth() {
5744        let runtime = rt();
5745        let h = Harness::new_with_auth(&runtime, Some("super-secret".into()));
5746        let r = h.router.clone();
5747        // No Authorization header → still 200 for /openapi.json.
5748        let (status, _body) = runtime.block_on(call(r, "GET", "/openapi.json", None));
5749        assert_eq!(status, StatusCode::OK);
5750        h.shutdown(&runtime);
5751    }
5752
5753    #[test]
5754    fn remember_returns_memory_id() {
5755        let runtime = rt();
5756        let h = Harness::new(&runtime);
5757        let r = h.router.clone();
5758        let (status, body) = runtime.block_on(call(
5759            r,
5760            "POST",
5761            "/memory",
5762            Some(json!({ "content": "http harness test" })),
5763        ));
5764        assert_eq!(status, StatusCode::OK);
5765        let mid = body.get("memory_id").and_then(|v| v.as_str()).unwrap();
5766        assert_eq!(mid.len(), 36, "uuid length");
5767        h.shutdown(&runtime);
5768    }
5769
5770    #[test]
5771    fn empty_content_returns_400() {
5772        let runtime = rt();
5773        let h = Harness::new(&runtime);
5774        let r = h.router.clone();
5775        let (status, body) =
5776            runtime.block_on(call(r, "POST", "/memory", Some(json!({ "content": "" }))));
5777        assert_eq!(status, StatusCode::BAD_REQUEST);
5778        assert!(
5779            body.get("error")
5780                .and_then(|e| e.as_str())
5781                .map(|s| s.contains("must not be empty"))
5782                .unwrap_or(false),
5783            "got: {body}"
5784        );
5785        h.shutdown(&runtime);
5786    }
5787
5788    #[test]
5789    fn empty_query_returns_400() {
5790        let runtime = rt();
5791        let h = Harness::new(&runtime);
5792        let r = h.router.clone();
5793        let (status, body) = runtime.block_on(call(
5794            r,
5795            "POST",
5796            "/memory/search",
5797            Some(json!({ "query": "" })),
5798        ));
5799        assert_eq!(status, StatusCode::BAD_REQUEST);
5800        assert!(
5801            body.get("error")
5802                .and_then(|e| e.as_str())
5803                .map(|s| s.contains("must not be empty"))
5804                .unwrap_or(false),
5805            "got: {body}"
5806        );
5807        h.shutdown(&runtime);
5808    }
5809
5810    #[test]
5811    fn inspect_unknown_returns_404() {
5812        let runtime = rt();
5813        let h = Harness::new(&runtime);
5814        let r = h.router.clone();
5815        let (status, body) = runtime.block_on(call(
5816            r,
5817            "GET",
5818            "/memory/00000000-0000-7000-8000-000000000000",
5819            None,
5820        ));
5821        assert_eq!(status, StatusCode::NOT_FOUND);
5822        assert!(body.get("error").is_some(), "got: {body}");
5823        h.shutdown(&runtime);
5824    }
5825
5826    #[test]
5827    fn inspect_invalid_id_returns_400() {
5828        let runtime = rt();
5829        let h = Harness::new(&runtime);
5830        let r = h.router.clone();
5831        let (status, _body) = runtime.block_on(call(r, "GET", "/memory/not-a-uuid", None));
5832        assert_eq!(status, StatusCode::BAD_REQUEST);
5833        h.shutdown(&runtime);
5834    }
5835
5836    #[test]
5837    fn forget_unknown_returns_404() {
5838        let runtime = rt();
5839        let h = Harness::new(&runtime);
5840        let r = h.router.clone();
5841        let (status, _body) = runtime.block_on(call(
5842            r,
5843            "DELETE",
5844            "/memory/00000000-0000-7000-8000-000000000000",
5845            None,
5846        ));
5847        assert_eq!(status, StatusCode::NOT_FOUND);
5848        h.shutdown(&runtime);
5849    }
5850
5851    /// `POST /memory/consolidate` runs the cluster pass and returns
5852    /// the report as JSON. With an empty body, `ConsolidationScope`
5853    /// defaults to unbounded; with a non-empty body, the
5854    /// `window_days` field is honored. The Harness's writer is
5855    /// spawned without a Steward, so `abstractions_built` stays 0
5856    /// even when `clusters_built` is nonzero — same posture as the
5857    /// daemon today.
5858    #[test]
5859    fn consolidate_endpoint_returns_report() {
5860        let runtime = rt();
5861        let h = Harness::new(&runtime);
5862        let r = h.router.clone();
5863        runtime.block_on(async move {
5864            // Empty DB → all-zero report; structural assertion only.
5865            let (status, body) = call(r.clone(), "POST", "/memory/consolidate", None).await;
5866            assert_eq!(status, StatusCode::OK);
5867            for field in [
5868                "episodes_seen",
5869                "clusters_built",
5870                "episodes_clustered",
5871                "abstractions_built",
5872                "triples_built",
5873                "contradictions_found",
5874            ] {
5875                assert!(
5876                    body.get(field).and_then(|v| v.as_u64()).is_some(),
5877                    "missing field {field}: {body}"
5878                );
5879            }
5880            assert_eq!(body["episodes_seen"], 0);
5881            assert_eq!(body["clusters_built"], 0);
5882
5883            // Non-empty body with window_days → still 200; unmistakable
5884            // shape round-trips through ConsolidationScope's serde.
5885            let (status2, _body2) = call(
5886                r,
5887                "POST",
5888                "/memory/consolidate",
5889                Some(json!({ "window_days": 7 })),
5890            )
5891            .await;
5892            assert_eq!(status2, StatusCode::OK);
5893        });
5894        h.shutdown(&runtime);
5895    }
5896
5897    #[test]
5898    fn auth_required_routes_reject_missing_token() {
5899        let runtime = rt();
5900        let h = Harness::new_with_auth(&runtime, Some("secret-xyz".into()));
5901        let r = h.router.clone();
5902        runtime.block_on(async move {
5903            // No Authorization header → 401.
5904            let (status, _body) = call(
5905                r.clone(),
5906                "POST",
5907                "/memory",
5908                Some(json!({ "content": "x" })),
5909            )
5910            .await;
5911            assert_eq!(status, StatusCode::UNAUTHORIZED);
5912
5913            // Wrong token → 401.
5914            let (status, _body) = call_with_auth(
5915                r.clone(),
5916                "POST",
5917                "/memory",
5918                Some(json!({ "content": "x" })),
5919                Some("Bearer wrong-token"),
5920            )
5921            .await;
5922            assert_eq!(status, StatusCode::UNAUTHORIZED);
5923
5924            // Correct token → handler runs (200).
5925            let (status, body) = call_with_auth(
5926                r.clone(),
5927                "POST",
5928                "/memory",
5929                Some(json!({ "content": "authed" })),
5930                Some("Bearer secret-xyz"),
5931            )
5932            .await;
5933            assert_eq!(status, StatusCode::OK);
5934            assert!(body.get("memory_id").is_some());
5935        });
5936        h.shutdown(&runtime);
5937    }
5938
5939    #[test]
5940    fn health_endpoint_does_not_require_auth() {
5941        let runtime = rt();
5942        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
5943        let r = h.router.clone();
5944        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
5945        // Liveness probes should work without credentials.
5946        assert_eq!(status, StatusCode::OK);
5947        h.shutdown(&runtime);
5948    }
5949
5950    #[test]
5951    fn auth_response_includes_www_authenticate_header() {
5952        // Verify the WWW-Authenticate hint that lets a well-behaved
5953        // client know it's a bearer-auth scheme. We check via raw
5954        // request → response (oneshot returns Response, but our
5955        // call() helper drops the headers; build the request manually).
5956        let runtime = rt();
5957        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
5958        let r = h.router.clone();
5959        runtime.block_on(async move {
5960            let req = Request::builder()
5961                .method("POST")
5962                .uri("/memory")
5963                .header("content-type", "application/json")
5964                .body(Body::from(serde_json::to_vec(&json!({ "content": "x" })).unwrap()))
5965                .unwrap();
5966            let resp = r.oneshot(req).await.unwrap();
5967            assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
5968            let www = resp
5969                .headers()
5970                .get("www-authenticate")
5971                .and_then(|v| v.to_str().ok())
5972                .unwrap_or("");
5973            assert!(
5974                www.starts_with("Bearer"),
5975                "expected WWW-Authenticate: Bearer..., got: {www}"
5976            );
5977        });
5978        h.shutdown(&runtime);
5979    }
5980
5981    // ---------------------------------------------------------------------
5982    // v0.8.0 P3: OIDC end-to-end. Spin up a fake IdP (wiremock) that
5983    // serves an OIDC discovery doc + JWKS, mint a token claiming
5984    // `solo_tenant = "default"`, and verify it routes through the
5985    // middleware + TenantExtractor + handler.
5986    // ---------------------------------------------------------------------
5987
5988    fn base64_url_for_test(bytes: &[u8]) -> String {
5989        use base64::Engine;
5990        base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes)
5991    }
5992
5993    /// Spin up a single-purpose fake OIDC IdP for these tests. Returns
5994    /// (mock_server, discovery_url, secret, kid).
5995    async fn spin_fake_idp() -> (wiremock::MockServer, String, Vec<u8>, &'static str) {
5996        use wiremock::matchers::{method, path};
5997        use wiremock::{Mock, MockServer, ResponseTemplate};
5998        let server = MockServer::start().await;
5999        let secret = b"http-test-secret-for-hmac-fixture".to_vec();
6000        let kid = "http-test-kid";
6001        let discovery = serde_json::json!({
6002            "issuer": server.uri(),
6003            "jwks_uri": format!("{}/jwks", server.uri()),
6004        });
6005        Mock::given(method("GET"))
6006            .and(path("/.well-known/openid-configuration"))
6007            .respond_with(ResponseTemplate::new(200).set_body_json(discovery))
6008            .mount(&server)
6009            .await;
6010        let jwks = serde_json::json!({
6011            "keys": [
6012                {
6013                    "kty": "oct",
6014                    "kid": kid,
6015                    "alg": "HS256",
6016                    "k": base64_url_for_test(&secret),
6017                }
6018            ]
6019        });
6020        Mock::given(method("GET"))
6021            .and(path("/jwks"))
6022            .respond_with(ResponseTemplate::new(200).set_body_json(jwks))
6023            .mount(&server)
6024            .await;
6025        let discovery_url = format!("{}/.well-known/openid-configuration", server.uri());
6026        (server, discovery_url, secret, kid)
6027    }
6028
6029    fn mint_idp_token(
6030        server_uri: &str,
6031        kid: &str,
6032        secret: &[u8],
6033        tenant_claim: &str,
6034        audience: &str,
6035    ) -> String {
6036        use jsonwebtoken::{Algorithm, EncodingKey, Header};
6037        let mut header = Header::new(Algorithm::HS256);
6038        header.kid = Some(kid.to_string());
6039        let now = std::time::SystemTime::now()
6040            .duration_since(std::time::UNIX_EPOCH)
6041            .unwrap()
6042            .as_secs();
6043        let claims = serde_json::json!({
6044            "iss": server_uri,
6045            "sub": "test-user-1",
6046            "aud": audience,
6047            "exp": now + 600,
6048            "iat": now,
6049            "solo_tenant": tenant_claim,
6050        });
6051        jsonwebtoken::encode(&header, &claims, &EncodingKey::from_secret(secret))
6052            .expect("mint token")
6053    }
6054
6055    #[test]
6056    fn http_oidc_accept_resolves_to_tenant_from_claim() {
6057        let runtime = rt();
6058        let (fake_server, discovery_url, secret, kid) =
6059            runtime.block_on(async { spin_fake_idp().await });
6060        let server_uri = fake_server.uri();
6061        // Keep the wiremock server alive for the duration of this test.
6062        let _server_guard = fake_server;
6063
6064        let auth = crate::auth::AuthConfig::Oidc {
6065            discovery_url,
6066            audience: "test-audience".to_string(),
6067            tenant_claim_name: "solo_tenant".to_string(),
6068        };
6069        let h = Harness::new_with_auth_config(&runtime, Some(auth));
6070        let r = h.router.clone();
6071
6072        // Mint a token claiming the harness's default tenant.
6073        let token = mint_idp_token(
6074            &server_uri,
6075            kid,
6076            &secret,
6077            "default",
6078            "test-audience",
6079        );
6080
6081        runtime.block_on(async move {
6082            // POST /memory with a valid OIDC token → handler runs, returns memory_id.
6083            let (status, body) = call_with_auth(
6084                r.clone(),
6085                "POST",
6086                "/memory",
6087                Some(json!({ "content": "oidc-routed content" })),
6088                Some(&format!("Bearer {token}")),
6089            )
6090            .await;
6091            assert_eq!(status, StatusCode::OK, "got body: {body}");
6092            assert!(body.get("memory_id").is_some(), "no memory_id in {body}");
6093        });
6094        h.shutdown(&runtime);
6095    }
6096
6097    #[test]
6098    fn http_oidc_reject_missing_token_returns_401() {
6099        let runtime = rt();
6100        let (fake_server, discovery_url, _secret, _kid) =
6101            runtime.block_on(async { spin_fake_idp().await });
6102        let _server_guard = fake_server;
6103        let auth = crate::auth::AuthConfig::Oidc {
6104            discovery_url,
6105            audience: "test-audience".to_string(),
6106            tenant_claim_name: "solo_tenant".to_string(),
6107        };
6108        let h = Harness::new_with_auth_config(&runtime, Some(auth));
6109        let r = h.router.clone();
6110        runtime.block_on(async move {
6111            // No Authorization header.
6112            let (status, _body) =
6113                call(r.clone(), "POST", "/memory", Some(json!({ "content": "x" }))).await;
6114            assert_eq!(status, StatusCode::UNAUTHORIZED);
6115
6116            // Garbage token → 401 (invalid signature / not a JWT).
6117            let (status, _body) = call_with_auth(
6118                r.clone(),
6119                "POST",
6120                "/memory",
6121                Some(json!({ "content": "x" })),
6122                Some("Bearer not-a-real-jwt"),
6123            )
6124            .await;
6125            assert_eq!(status, StatusCode::UNAUTHORIZED);
6126        });
6127        h.shutdown(&runtime);
6128    }
6129
6130    #[test]
6131    fn full_remember_recall_inspect_forget_round_trip() {
6132        let runtime = rt();
6133        let h = Harness::new(&runtime);
6134        let r = h.router.clone();
6135        runtime.block_on(async move {
6136            // POST /memory
6137            let (status, body) = call(
6138                r.clone(),
6139                "POST",
6140                "/memory",
6141                Some(json!({ "content": "round-trip content" })),
6142            )
6143            .await;
6144            assert_eq!(status, StatusCode::OK);
6145            let mid = body
6146                .get("memory_id")
6147                .and_then(|v| v.as_str())
6148                .unwrap()
6149                .to_string();
6150
6151            // POST /memory/search — exact-match (StubEmbedder) returns the row.
6152            let (status, body) = call(
6153                r.clone(),
6154                "POST",
6155                "/memory/search",
6156                Some(json!({ "query": "round-trip content", "limit": 5 })),
6157            )
6158            .await;
6159            assert_eq!(status, StatusCode::OK);
6160            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
6161            assert!(
6162                hits.iter()
6163                    .any(|h| h.get("content").and_then(|c| c.as_str())
6164                        == Some("round-trip content")),
6165                "expected hit with content; got: {body}"
6166            );
6167
6168            // GET /memory/{id}
6169            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
6170            assert_eq!(status, StatusCode::OK);
6171            assert_eq!(body.get("status").and_then(|v| v.as_str()), Some("active"));
6172
6173            // DELETE /memory/{id}
6174            let (status, _body) =
6175                call(r.clone(), "DELETE", &format!("/memory/{mid}"), None).await;
6176            assert_eq!(status, StatusCode::NO_CONTENT);
6177
6178            // GET again — still readable but status='forgotten'
6179            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
6180            assert_eq!(status, StatusCode::OK);
6181            assert_eq!(
6182                body.get("status").and_then(|v| v.as_str()),
6183                Some("forgotten")
6184            );
6185
6186            // POST /memory/search — forgotten row excluded.
6187            let (status, body) = call(
6188                r.clone(),
6189                "POST",
6190                "/memory/search",
6191                Some(json!({ "query": "round-trip content", "limit": 5 })),
6192            )
6193            .await;
6194            assert_eq!(status, StatusCode::OK);
6195            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
6196            assert!(
6197                hits.iter().all(|h| h.get("memory_id").and_then(|m| m.as_str())
6198                    != Some(mid.as_str())),
6199                "forgotten row should be excluded from recall: {body}"
6200            );
6201        });
6202        h.shutdown(&runtime);
6203    }
6204
6205    // Path 1 derived-layer endpoint tests (v0.4.0+). Wire-path only —
6206    // the actual content correctness is covered by solo-query::derived's
6207    // own tests (Sub-task A). These verify the HTTP shape: GET routing,
6208    // Query-string param parsing, JSON-array response body, validation
6209    // 400s for invalid inputs.
6210
6211    #[test]
6212    fn themes_endpoint_returns_empty_array_on_empty_db() {
6213        let runtime = rt();
6214        let h = Harness::new(&runtime);
6215        let r = h.router.clone();
6216        let (status, body) =
6217            runtime.block_on(call(r, "GET", "/memory/themes", None));
6218        assert_eq!(status, StatusCode::OK);
6219        assert!(body.is_array(), "expected array, got {body}");
6220        assert_eq!(body.as_array().unwrap().len(), 0);
6221        h.shutdown(&runtime);
6222    }
6223
6224    #[test]
6225    fn themes_endpoint_passes_through_query_params() {
6226        let runtime = rt();
6227        let h = Harness::new(&runtime);
6228        let r = h.router.clone();
6229        let (status, body) = runtime.block_on(call(
6230            r,
6231            "GET",
6232            "/memory/themes?window_days=7&limit=20",
6233            None,
6234        ));
6235        assert_eq!(status, StatusCode::OK);
6236        assert!(body.is_array(), "expected array, got {body}");
6237        h.shutdown(&runtime);
6238    }
6239
6240    #[test]
6241    fn facts_about_endpoint_requires_subject() {
6242        let runtime = rt();
6243        let h = Harness::new(&runtime);
6244        let r = h.router.clone();
6245        // Missing subject — axum's Query extractor 422 (Unprocessable
6246        // Entity) on missing required field; some axum versions
6247        // surface as 400. Accept either.
6248        let (status, _body) =
6249            runtime.block_on(call(r, "GET", "/memory/facts_about", None));
6250        assert!(
6251            status == StatusCode::BAD_REQUEST
6252                || status == StatusCode::UNPROCESSABLE_ENTITY,
6253            "expected 400 or 422 for missing subject, got {status}"
6254        );
6255        h.shutdown(&runtime);
6256    }
6257
6258    #[test]
6259    fn facts_about_endpoint_rejects_blank_subject() {
6260        let runtime = rt();
6261        let h = Harness::new(&runtime);
6262        let r = h.router.clone();
6263        // Whitespace-only subject reaches the handler then trips its
6264        // own validation → ApiError::bad_request → 400.
6265        let (status, body) = runtime.block_on(call(
6266            r,
6267            "GET",
6268            "/memory/facts_about?subject=%20%20",
6269            None,
6270        ));
6271        assert_eq!(status, StatusCode::BAD_REQUEST);
6272        assert!(
6273            body.get("error")
6274                .and_then(|v| v.as_str())
6275                .is_some_and(|s| s.contains("subject")),
6276            "expected error mentioning subject, got {body}"
6277        );
6278        h.shutdown(&runtime);
6279    }
6280
6281    #[test]
6282    fn facts_about_endpoint_returns_empty_array_for_unknown_subject() {
6283        let runtime = rt();
6284        let h = Harness::new(&runtime);
6285        let r = h.router.clone();
6286        let (status, body) = runtime.block_on(call(
6287            r,
6288            "GET",
6289            "/memory/facts_about?subject=NobodyKnows",
6290            None,
6291        ));
6292        assert_eq!(status, StatusCode::OK);
6293        assert_eq!(body.as_array().unwrap().len(), 0);
6294        h.shutdown(&runtime);
6295    }
6296
6297    #[test]
6298    fn facts_about_endpoint_parses_include_as_object_query_param() {
6299        // v0.5.1 P8: `?include_as_object=true` must parse cleanly
6300        // through the `Query<FactsAboutQuery>` extractor. If the
6301        // struct field is missing or wrongly typed, axum returns
6302        // 400/422 before reaching the handler. We don't seed
6303        // triples; we only need the request to reach the handler
6304        // and produce a normal 200 + empty array. Mirrors
6305        // `inspect_cluster_endpoint_passes_full_content_query_param`.
6306        let runtime = rt();
6307        let h = Harness::new(&runtime);
6308        let r = h.router.clone();
6309        let (status, body) = runtime.block_on(call(
6310            r,
6311            "GET",
6312            "/memory/facts_about?subject=Maya&include_as_object=true",
6313            None,
6314        ));
6315        assert_eq!(
6316            status,
6317            StatusCode::OK,
6318            "expected 200 with include_as_object query param, got {status}"
6319        );
6320        assert!(body.is_array());
6321        h.shutdown(&runtime);
6322    }
6323
6324    #[test]
6325    fn inspect_cluster_endpoint_unknown_id_returns_404() {
6326        // Maps `Error::NotFound` from `solo_query::inspect_cluster`
6327        // through `ApiError::from` → 404. Mirrors the unknown-memory
6328        // case for `GET /memory/{id}`.
6329        let runtime = rt();
6330        let h = Harness::new(&runtime);
6331        let r = h.router.clone();
6332        let (status, body) = runtime.block_on(call(
6333            r,
6334            "GET",
6335            "/memory/clusters/no-such-cluster",
6336            None,
6337        ));
6338        assert_eq!(status, StatusCode::NOT_FOUND);
6339        assert!(
6340            body.get("error")
6341                .and_then(|v| v.as_str())
6342                .is_some_and(|s| s.contains("no-such-cluster")),
6343            "expected error mentioning cluster id, got {body}"
6344        );
6345        h.shutdown(&runtime);
6346    }
6347
6348    #[test]
6349    fn inspect_cluster_endpoint_passes_full_content_query_param() {
6350        // Even with no matching cluster (→ 404), the request must
6351        // reach the handler — proves the `?full_content=true` query
6352        // string parses cleanly (Query<InspectClusterQuery>::default
6353        // path didn't choke). If we accidentally fail at the extractor
6354        // we'd get a 400/422, not the expected 404.
6355        let runtime = rt();
6356        let h = Harness::new(&runtime);
6357        let r = h.router.clone();
6358        let (status, _body) = runtime.block_on(call(
6359            r,
6360            "GET",
6361            "/memory/clusters/missing?full_content=true",
6362            None,
6363        ));
6364        assert_eq!(status, StatusCode::NOT_FOUND);
6365        h.shutdown(&runtime);
6366    }
6367
6368    #[test]
6369    fn contradictions_endpoint_returns_empty_array_on_empty_db() {
6370        let runtime = rt();
6371        let h = Harness::new(&runtime);
6372        let r = h.router.clone();
6373        let (status, body) = runtime.block_on(call(
6374            r,
6375            "GET",
6376            "/memory/contradictions",
6377            None,
6378        ));
6379        assert_eq!(status, StatusCode::OK);
6380        assert!(body.is_array());
6381        assert_eq!(body.as_array().unwrap().len(), 0);
6382        h.shutdown(&runtime);
6383    }
6384
6385    #[test]
6386    fn derived_endpoints_require_bearer_when_auth_enabled() {
6387        let runtime = rt();
6388        let h = Harness::new_with_auth(&runtime, Some("secret-token".to_string()));
6389        // Each of the three new endpoints should reject missing token.
6390        // Per the existing tests' shutdown-timing comment: don't hold a
6391        // long-lived router clone across multiple iterations — drop the
6392        // clone before each subsequent oneshot, and don't keep a `let r =
6393        // h.router.clone()` alive across h.shutdown(). Re-clone per
6394        // iteration; the per-call clone is consumed by oneshot.
6395        for path in [
6396            "/memory/themes",
6397            "/memory/facts_about?subject=Sam",
6398            "/memory/contradictions",
6399            "/memory/clusters/any-id",
6400        ] {
6401            let (status, _) = runtime.block_on(call(h.router.clone(), "GET", path, None));
6402            assert_eq!(
6403                status,
6404                StatusCode::UNAUTHORIZED,
6405                "{path} should 401 without token"
6406            );
6407        }
6408        h.shutdown(&runtime);
6409    }
6410
6411    // ---- Document endpoints (v0.7.0 P6) ----
6412    //
6413    // Wire-path coverage. The `Harness` here uses
6414    // `WriterActor::spawn_full` without an embedder — same shape as the
6415    // existing handler tests. Ingest/search would fail at the writer
6416    // boundary with "writer has no embedder", but every other path
6417    // (404s, malformed ids, route shape, bearer auth gating, OpenAPI
6418    // documentation) is exercisable. Real end-to-end ingest→search
6419    // round-trip lives in `mcp_smoke.rs` where a real subprocess runs
6420    // with a fully-wired writer.
6421
6422    #[test]
6423    fn list_documents_endpoint_returns_empty_array_on_empty_db() {
6424        let runtime = rt();
6425        let h = Harness::new(&runtime);
6426        let r = h.router.clone();
6427        let (status, body) = runtime.block_on(call(r, "GET", "/memory/documents", None));
6428        assert_eq!(status, StatusCode::OK);
6429        assert!(body.is_array(), "expected array, got {body}");
6430        assert_eq!(body.as_array().unwrap().len(), 0);
6431        h.shutdown(&runtime);
6432    }
6433
6434    #[test]
6435    fn list_documents_endpoint_parses_query_params() {
6436        let runtime = rt();
6437        let h = Harness::new(&runtime);
6438        let r = h.router.clone();
6439        let (status, body) = runtime.block_on(call(
6440            r,
6441            "GET",
6442            "/memory/documents?limit=5&offset=0&include_forgotten=true",
6443            None,
6444        ));
6445        assert_eq!(status, StatusCode::OK);
6446        assert!(body.is_array());
6447        h.shutdown(&runtime);
6448    }
6449
6450    #[test]
6451    fn ingest_document_endpoint_rejects_empty_path() {
6452        let runtime = rt();
6453        let h = Harness::new(&runtime);
6454        let r = h.router.clone();
6455        let (status, body) = runtime.block_on(call(
6456            r,
6457            "POST",
6458            "/memory/documents",
6459            Some(json!({ "path": "" })),
6460        ));
6461        assert_eq!(status, StatusCode::BAD_REQUEST);
6462        assert!(
6463            body.get("error")
6464                .and_then(|v| v.as_str())
6465                .is_some_and(|s| s.contains("path")),
6466            "expected error mentioning path, got {body}"
6467        );
6468        h.shutdown(&runtime);
6469    }
6470
6471    #[test]
6472    fn search_docs_endpoint_rejects_empty_query() {
6473        let runtime = rt();
6474        let h = Harness::new(&runtime);
6475        let r = h.router.clone();
6476        let (status, body) = runtime.block_on(call(
6477            r,
6478            "POST",
6479            "/memory/documents/search",
6480            Some(json!({ "query": "   " })),
6481        ));
6482        assert_eq!(status, StatusCode::BAD_REQUEST);
6483        assert!(
6484            body.get("error")
6485                .and_then(|v| v.as_str())
6486                .is_some_and(|s| s.contains("must not be empty")
6487                    || s.contains("doc_search")),
6488            "expected error mentioning empty query, got {body}"
6489        );
6490        h.shutdown(&runtime);
6491    }
6492
6493    #[test]
6494    fn inspect_document_endpoint_unknown_id_returns_404() {
6495        let runtime = rt();
6496        let h = Harness::new(&runtime);
6497        let r = h.router.clone();
6498        let (status, body) = runtime.block_on(call(
6499            r,
6500            "GET",
6501            "/memory/documents/00000000-0000-7000-8000-000000000000",
6502            None,
6503        ));
6504        assert_eq!(status, StatusCode::NOT_FOUND);
6505        assert!(body.get("error").is_some(), "got: {body}");
6506        h.shutdown(&runtime);
6507    }
6508
6509    #[test]
6510    fn inspect_document_endpoint_rejects_malformed_id() {
6511        let runtime = rt();
6512        let h = Harness::new(&runtime);
6513        let r = h.router.clone();
6514        let (status, _body) =
6515            runtime.block_on(call(r, "GET", "/memory/documents/not-a-uuid", None));
6516        assert_eq!(status, StatusCode::BAD_REQUEST);
6517        h.shutdown(&runtime);
6518    }
6519
6520    #[test]
6521    fn forget_document_endpoint_unknown_id_returns_404() {
6522        // Valid UUID format; no row exists → writer's `forget_document`
6523        // returns Error::NotFound → mapped to 404 by `ApiError::from`.
6524        let runtime = rt();
6525        let h = Harness::new(&runtime);
6526        let r = h.router.clone();
6527        let (status, _body) = runtime.block_on(call(
6528            r,
6529            "DELETE",
6530            "/memory/documents/00000000-0000-7000-8000-000000000000",
6531            None,
6532        ));
6533        assert_eq!(status, StatusCode::NOT_FOUND);
6534        h.shutdown(&runtime);
6535    }
6536
6537    #[test]
6538    fn forget_document_endpoint_rejects_malformed_id() {
6539        let runtime = rt();
6540        let h = Harness::new(&runtime);
6541        let r = h.router.clone();
6542        let (status, _body) =
6543            runtime.block_on(call(r, "DELETE", "/memory/documents/not-a-uuid", None));
6544        assert_eq!(status, StatusCode::BAD_REQUEST);
6545        h.shutdown(&runtime);
6546    }
6547
6548    #[test]
6549    fn document_endpoints_require_bearer_when_auth_enabled() {
6550        // All five doc endpoints sit behind the same authed Router and
6551        // must 401 without the bearer token. Mirrors
6552        // `derived_endpoints_require_bearer_when_auth_enabled`.
6553        let runtime = rt();
6554        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
6555        let cases: &[(&str, &str, Option<Value>)] = &[
6556            ("POST", "/memory/documents", Some(json!({ "path": "/x" }))),
6557            ("GET", "/memory/documents", None),
6558            (
6559                "POST",
6560                "/memory/documents/search",
6561                Some(json!({ "query": "x" })),
6562            ),
6563            (
6564                "GET",
6565                "/memory/documents/00000000-0000-7000-8000-000000000000",
6566                None,
6567            ),
6568            (
6569                "DELETE",
6570                "/memory/documents/00000000-0000-7000-8000-000000000000",
6571                None,
6572            ),
6573        ];
6574        for (method, path, body) in cases {
6575            let (status, _) =
6576                runtime.block_on(call(h.router.clone(), method, path, body.clone()));
6577            assert_eq!(
6578                status,
6579                StatusCode::UNAUTHORIZED,
6580                "{method} {path} should 401 without token"
6581            );
6582        }
6583        h.shutdown(&runtime);
6584    }
6585
6586    #[test]
6587    fn document_endpoints_accept_correct_bearer_token() {
6588        // Sanity check: with the right token, the same five endpoints
6589        // pass auth and reach the handler. We only assert that the
6590        // status code is NOT 401 — exact downstream behaviour depends
6591        // on the harness (no embedder → ingest/search would 500; empty
6592        // DB → list/inspect/forget return 200/404).
6593        let runtime = rt();
6594        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
6595        runtime.block_on(async {
6596            // GET /memory/documents → 200 + empty array (auth passes).
6597            let (status, _) = call_with_auth(
6598                h.router.clone(),
6599                "GET",
6600                "/memory/documents",
6601                None,
6602                Some("Bearer doc-secret"),
6603            )
6604            .await;
6605            assert_eq!(status, StatusCode::OK);
6606
6607            // GET /memory/documents/<unknown> → 404 (auth passes).
6608            let (status, _) = call_with_auth(
6609                h.router.clone(),
6610                "GET",
6611                "/memory/documents/00000000-0000-7000-8000-000000000000",
6612                None,
6613                Some("Bearer doc-secret"),
6614            )
6615            .await;
6616            assert_eq!(status, StatusCode::NOT_FOUND);
6617        });
6618        h.shutdown(&runtime);
6619    }
6620
6621    // ---------------------------------------------------------------------
6622    // v0.8.0 P2: tenant header extractor tests
6623    // ---------------------------------------------------------------------
6624
6625    /// `X-Solo-Tenant: default` resolves to the default tenant (which
6626    /// in the test harness is the only one wired in the registry).
6627    #[test]
6628    fn tenant_header_default_resolves() {
6629        let runtime = rt();
6630        let h = Harness::new(&runtime);
6631        let r = h.router.clone();
6632        let (status, _body) = runtime.block_on(async {
6633            let req = Request::builder()
6634                .method("GET")
6635                .uri("/memory/00000000-0000-7000-8000-000000000000")
6636                .header("x-solo-tenant", "default")
6637                .body(Body::empty())
6638                .unwrap();
6639            let resp = r.oneshot(req).await.expect("oneshot");
6640            let s = resp.status();
6641            let _b = resp.into_body().collect().await.unwrap().to_bytes();
6642            (s, _b)
6643        });
6644        // 404 because the id doesn't exist — but it's a routed 404 from
6645        // inspect_handler, not a 400 from a bad tenant header. That's
6646        // the proof point.
6647        assert_eq!(status, StatusCode::NOT_FOUND);
6648        h.shutdown(&runtime);
6649    }
6650
6651    /// `X-Solo-Tenant: UPPER` → 400 (invalid tenant id format).
6652    #[test]
6653    fn tenant_header_invalid_returns_400() {
6654        let runtime = rt();
6655        let h = Harness::new(&runtime);
6656        let r = h.router.clone();
6657        let (status, body) = runtime.block_on(async {
6658            let req = Request::builder()
6659                .method("GET")
6660                .uri("/memory/00000000-0000-7000-8000-000000000000")
6661                .header("x-solo-tenant", "UPPER")
6662                .body(Body::empty())
6663                .unwrap();
6664            let resp = r.oneshot(req).await.expect("oneshot");
6665            let s = resp.status();
6666            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
6667            let v: Value = serde_json::from_slice(&bytes).unwrap_or(Value::Null);
6668            (s, v)
6669        });
6670        assert_eq!(status, StatusCode::BAD_REQUEST);
6671        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
6672        assert!(
6673            msg.to_lowercase().contains("tenant") || msg.to_lowercase().contains("invalid"),
6674            "error must mention tenant/invalid: {msg}"
6675        );
6676        h.shutdown(&runtime);
6677    }
6678
6679    /// `X-Solo-Tenant: never-registered` → 404 (unknown tenant id).
6680    #[test]
6681    fn tenant_header_unknown_returns_404() {
6682        let runtime = rt();
6683        let h = Harness::new(&runtime);
6684        let r = h.router.clone();
6685        let (status, _body) = runtime.block_on(async {
6686            let req = Request::builder()
6687                .method("GET")
6688                .uri("/memory/00000000-0000-7000-8000-000000000000")
6689                .header("x-solo-tenant", "never-registered")
6690                .body(Body::empty())
6691                .unwrap();
6692            let resp = r.oneshot(req).await.expect("oneshot");
6693            let s = resp.status();
6694            let _b = resp.into_body().collect().await.unwrap().to_bytes();
6695            (s, _b)
6696        });
6697        assert_eq!(status, StatusCode::NOT_FOUND);
6698        h.shutdown(&runtime);
6699    }
6700
6701    /// No `X-Solo-Tenant` header → falls back to state.default_tenant.
6702    /// The reach-through to `inspect_handler` should produce the normal
6703    /// 404 for an unknown id rather than a tenant-routing error.
6704    #[test]
6705    fn tenant_header_missing_defaults_to_state_default_tenant() {
6706        let runtime = rt();
6707        let h = Harness::new(&runtime);
6708        let r = h.router.clone();
6709        let (status, _body) = runtime.block_on(async {
6710            let req = Request::builder()
6711                .method("GET")
6712                .uri("/memory/00000000-0000-7000-8000-000000000000")
6713                .body(Body::empty())
6714                .unwrap();
6715            let resp = r.oneshot(req).await.expect("oneshot");
6716            let s = resp.status();
6717            let _b = resp.into_body().collect().await.unwrap().to_bytes();
6718            (s, _b)
6719        });
6720        assert_eq!(status, StatusCode::NOT_FOUND);
6721        h.shutdown(&runtime);
6722    }
6723
6724    // ---------------------------------------------------------------------
6725    // v0.9.x: GET /v1/graph/expand
6726    //
6727    // Seeds tables directly via the Harness's side connection and walks
6728    // the four expansion kinds. The Harness is single-tenant (default);
6729    // the routing-isolation case is already covered by the
6730    // `tenant_header_*` tests above (an `X-Solo-Tenant: never-registered`
6731    // header against the same node_id surfaces 404 from the registry,
6732    // proving cross-tenant lookups can't bleed).
6733    // ---------------------------------------------------------------------
6734
6735    /// Insert one episode row directly. Returns its rowid for callers
6736    /// that need to wire `triples.source_episode_id`.
6737    fn seed_episode(
6738        conn: &rusqlite::Connection,
6739        memory_id: &str,
6740        ts_ms: i64,
6741        content: &str,
6742    ) -> i64 {
6743        conn.execute(
6744            "INSERT INTO episodes
6745                (memory_id, ts_ms, source_type, content,
6746                 encoding_context_json, tier, status,
6747                 confidence, strength, salience,
6748                 created_at_ms, updated_at_ms)
6749                VALUES (?1, ?2, 'user_message', ?3,
6750                        '{}', 'hot', 'active',
6751                        1.0, 0.5, 0.5, ?2, ?2)",
6752            rusqlite::params![memory_id, ts_ms, content],
6753        )
6754        .expect("seed episode");
6755        conn.last_insert_rowid()
6756    }
6757
6758    fn seed_cluster_row(conn: &rusqlite::Connection, cluster_id: &str, created_at_ms: i64) {
6759        conn.execute(
6760            "INSERT INTO clusters (cluster_id, coherence, created_at_ms)
6761                  VALUES (?1, 0.5, ?2)",
6762            rusqlite::params![cluster_id, created_at_ms],
6763        )
6764        .expect("seed cluster");
6765    }
6766
6767    fn seed_cluster_member(conn: &rusqlite::Connection, cluster_id: &str, memory_id: &str) {
6768        conn.execute(
6769            "INSERT INTO cluster_episodes (cluster_id, memory_id) VALUES (?1, ?2)",
6770            rusqlite::params![cluster_id, memory_id],
6771        )
6772        .expect("seed cluster_episodes");
6773    }
6774
6775    fn seed_document_row(conn: &rusqlite::Connection, doc_id: &str, title: &str) {
6776        conn.execute(
6777            "INSERT INTO documents
6778                (doc_id, source, title, mime_type, ingested_at_ms,
6779                 modified_at_ms, status, chunk_count, content_hash, byte_size)
6780                VALUES (?1, ?2, ?3, 'text/plain', 0, NULL,
6781                        'active', 0, ?1, NULL)",
6782            rusqlite::params![doc_id, format!("/tmp/{title}.txt"), title],
6783        )
6784        .expect("seed doc");
6785    }
6786
6787    fn seed_chunk_row(
6788        conn: &rusqlite::Connection,
6789        chunk_id: &str,
6790        doc_id: &str,
6791        chunk_index: i64,
6792        content: &str,
6793    ) {
6794        conn.execute(
6795            "INSERT INTO document_chunks
6796                (chunk_id, doc_id, chunk_index, content,
6797                 token_count, start_offset, end_offset, created_at_ms)
6798                VALUES (?1, ?2, ?3, ?4, 1, 0, ?5, 0)",
6799            rusqlite::params![chunk_id, doc_id, chunk_index, content, content.len() as i64],
6800        )
6801        .expect("seed chunk");
6802    }
6803
6804    fn seed_triple_row(
6805        conn: &rusqlite::Connection,
6806        triple_id: &str,
6807        subject: &str,
6808        predicate: &str,
6809        object: &str,
6810        source_episode_rowid: Option<i64>,
6811    ) {
6812        conn.execute(
6813            "INSERT INTO triples
6814                 (triple_id, subject_id, predicate, object_id, object_kind,
6815                  valid_from_ms, valid_to_ms, confidence, provenance_json,
6816                  status, created_at_ms, updated_at_ms, source_episode_id)
6817                 VALUES (?1, ?2, ?3, ?4, 'literal', 0, NULL, 0.9, '{}',
6818                         'active', 0, 0, ?5)",
6819            rusqlite::params![triple_id, subject, predicate, object, source_episode_rowid],
6820        )
6821        .expect("seed triple");
6822    }
6823
6824    /// Insert a `semantic_abstractions` row (cluster LLM summary). Used
6825    /// by the cluster-inspect test to verify the abstraction concat path.
6826    fn seed_abstraction_row(
6827        conn: &rusqlite::Connection,
6828        abstraction_id: &str,
6829        cluster_id: &str,
6830        content: &str,
6831    ) {
6832        conn.execute(
6833            "INSERT INTO semantic_abstractions
6834                 (abstraction_id, cluster_id, content, provenance_json,
6835                  confidence, created_at_ms)
6836                 VALUES (?1, ?2, ?3, '{}', 0.9, 0)",
6837            rusqlite::params![abstraction_id, cluster_id, content],
6838        )
6839        .expect("seed abstraction");
6840    }
6841
6842    /// Tests use simple ASCII node_ids (UUID-shaped + plain entity strings),
6843    /// so we percent-encode only `:` and a few other delimiters by hand.
6844    fn percent_encode_node_id(node_id: &str) -> String {
6845        let mut out = String::with_capacity(node_id.len());
6846        for c in node_id.chars() {
6847            match c {
6848                ':' => out.push_str("%3A"),
6849                ' ' => out.push_str("%20"),
6850                '&' => out.push_str("%26"),
6851                '+' => out.push_str("%2B"),
6852                '?' => out.push_str("%3F"),
6853                '#' => out.push_str("%23"),
6854                _ => out.push(c),
6855            }
6856        }
6857        out
6858    }
6859
6860    fn graph_uri(node_id: &str, kind: &str) -> String {
6861        let encoded = percent_encode_node_id(node_id);
6862        format!("/v1/graph/expand?node_id={encoded}&kind={kind}")
6863    }
6864
6865    fn graph_uri_with_limit(node_id: &str, kind: &str, limit: u32) -> String {
6866        let encoded = percent_encode_node_id(node_id);
6867        format!("/v1/graph/expand?node_id={encoded}&kind={kind}&limit={limit}")
6868    }
6869
6870    #[test]
6871    fn expand_cluster_member_from_episode_returns_clusters() {
6872        let runtime = rt();
6873        let h = Harness::new(&runtime);
6874        let memory_id = "11111111-1111-7000-8000-000000000001";
6875        {
6876            let conn = h.open_db();
6877            seed_episode(&conn, memory_id, 100, "ep content");
6878            seed_cluster_row(&conn, "cl-a", 200);
6879            seed_cluster_member(&conn, "cl-a", memory_id);
6880        }
6881        let node_id = format!("ep:{memory_id}");
6882        let (status, body) = runtime.block_on(call(
6883            h.router.clone(),
6884            "GET",
6885            &graph_uri(&node_id, "cluster_member"),
6886            None,
6887        ));
6888        assert_eq!(status, StatusCode::OK, "body: {body}");
6889        let nodes = body.get("nodes").and_then(|v| v.as_array()).expect("nodes array");
6890        let edges = body.get("edges").and_then(|v| v.as_array()).expect("edges array");
6891        assert_eq!(nodes.len(), 1, "{body}");
6892        assert_eq!(nodes[0]["id"], "cl:cl-a");
6893        assert_eq!(nodes[0]["kind"], "cluster");
6894        assert_eq!(edges.len(), 1);
6895        assert_eq!(edges[0]["source"], node_id);
6896        assert_eq!(edges[0]["target"], "cl:cl-a");
6897        assert_eq!(edges[0]["kind"], "cluster_member");
6898        h.shutdown(&runtime);
6899    }
6900
6901    #[test]
6902    fn expand_cluster_member_from_cluster_returns_episodes() {
6903        let runtime = rt();
6904        let h = Harness::new(&runtime);
6905        {
6906            let conn = h.open_db();
6907            seed_cluster_row(&conn, "cl-multi", 500);
6908            for i in 0..5 {
6909                let mid = format!("2222{i}222-2222-7000-8000-000000000001");
6910                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
6911                seed_cluster_member(&conn, "cl-multi", &mid);
6912            }
6913        }
6914        let (status, body) = runtime.block_on(call(
6915            h.router.clone(),
6916            "GET",
6917            &graph_uri_with_limit("cl:cl-multi", "cluster_member", 3),
6918            None,
6919        ));
6920        assert_eq!(status, StatusCode::OK, "body: {body}");
6921        let nodes = body["nodes"].as_array().unwrap();
6922        let edges = body["edges"].as_array().unwrap();
6923        assert_eq!(nodes.len(), 3, "limit honored: {body}");
6924        assert_eq!(edges.len(), 3);
6925        for n in nodes {
6926            assert_eq!(n["kind"], "episode");
6927        }
6928        h.shutdown(&runtime);
6929    }
6930
6931    #[test]
6932    fn expand_document_chunk_from_document_returns_chunks() {
6933        let runtime = rt();
6934        let h = Harness::new(&runtime);
6935        let doc_id = "33333333-3333-7000-8000-000000000001";
6936        {
6937            let conn = h.open_db();
6938            seed_document_row(&conn, doc_id, "doc A");
6939            // Insert chunks in shuffled order so the ORDER BY chunk_index
6940            // is load-bearing.
6941            seed_chunk_row(&conn, "c2", doc_id, 2, "chunk 2 text");
6942            seed_chunk_row(&conn, "c0", doc_id, 0, "chunk 0 text");
6943            seed_chunk_row(&conn, "c1", doc_id, 1, "chunk 1 text");
6944            seed_chunk_row(&conn, "c3", doc_id, 3, "chunk 3 text");
6945        }
6946        let node_id = format!("doc:{doc_id}");
6947        let (status, body) = runtime.block_on(call(
6948            h.router.clone(),
6949            "GET",
6950            &graph_uri(&node_id, "document_chunk"),
6951            None,
6952        ));
6953        assert_eq!(status, StatusCode::OK, "body: {body}");
6954        let nodes = body["nodes"].as_array().unwrap();
6955        let edges = body["edges"].as_array().unwrap();
6956        assert_eq!(nodes.len(), 4);
6957        assert_eq!(edges.len(), 4);
6958        // Verify in-order chunk_index emission.
6959        assert_eq!(nodes[0]["id"], "chunk:c0");
6960        assert_eq!(nodes[1]["id"], "chunk:c1");
6961        assert_eq!(nodes[2]["id"], "chunk:c2");
6962        assert_eq!(nodes[3]["id"], "chunk:c3");
6963        for e in edges {
6964            assert_eq!(e["kind"], "document_chunk");
6965        }
6966        h.shutdown(&runtime);
6967    }
6968
6969    #[test]
6970    fn expand_document_chunk_from_chunk_returns_parent_document() {
6971        let runtime = rt();
6972        let h = Harness::new(&runtime);
6973        let doc_id = "44444444-4444-7000-8000-000000000001";
6974        {
6975            let conn = h.open_db();
6976            seed_document_row(&conn, doc_id, "parent doc");
6977            seed_chunk_row(&conn, "c-orphan", doc_id, 0, "chunk content");
6978        }
6979        let (status, body) = runtime.block_on(call(
6980            h.router.clone(),
6981            "GET",
6982            &graph_uri("chunk:c-orphan", "document_chunk"),
6983            None,
6984        ));
6985        assert_eq!(status, StatusCode::OK, "body: {body}");
6986        let nodes = body["nodes"].as_array().unwrap();
6987        let edges = body["edges"].as_array().unwrap();
6988        assert_eq!(nodes.len(), 1);
6989        assert_eq!(edges.len(), 1);
6990        assert_eq!(nodes[0]["id"], format!("doc:{doc_id}"));
6991        assert_eq!(edges[0]["source"], "chunk:c-orphan");
6992        assert_eq!(edges[0]["target"], format!("doc:{doc_id}"));
6993        h.shutdown(&runtime);
6994    }
6995
6996    #[test]
6997    fn expand_triple_from_episode_returns_entities() {
6998        let runtime = rt();
6999        let h = Harness::new(&runtime);
7000        let memory_id = "55555555-5555-7000-8000-000000000001";
7001        let rowid;
7002        {
7003            let conn = h.open_db();
7004            rowid = seed_episode(&conn, memory_id, 100, "alice works at anthropic");
7005            // Two distinct triples → 4 entity endpoints (Alice, Anthropic, Bob, NYC).
7006            seed_triple_row(&conn, "t1", "Alice", "works_at", "Anthropic", Some(rowid));
7007            seed_triple_row(&conn, "t2", "Bob", "lives_in", "NYC", Some(rowid));
7008        }
7009        let node_id = format!("ep:{memory_id}");
7010        let (status, body) = runtime.block_on(call(
7011            h.router.clone(),
7012            "GET",
7013            &graph_uri(&node_id, "triple"),
7014            None,
7015        ));
7016        assert_eq!(status, StatusCode::OK, "body: {body}");
7017        let nodes = body["nodes"].as_array().unwrap();
7018        let edges = body["edges"].as_array().unwrap();
7019        assert_eq!(nodes.len(), 4, "expected 4 unique entity nodes: {body}");
7020        assert_eq!(edges.len(), 2);
7021        let ids: std::collections::HashSet<String> = nodes
7022            .iter()
7023            .map(|n| n["id"].as_str().unwrap().to_string())
7024            .collect();
7025        for expected in ["ent:Alice", "ent:Anthropic", "ent:Bob", "ent:NYC"] {
7026            assert!(ids.contains(expected), "missing {expected} in {body}");
7027        }
7028        for e in edges {
7029            assert_eq!(e["kind"], "triple");
7030            assert!(e["predicate"].is_string(), "predicate set: {body}");
7031        }
7032        h.shutdown(&runtime);
7033    }
7034
7035    #[test]
7036    fn expand_triple_from_entity_returns_episodes() {
7037        let runtime = rt();
7038        let h = Harness::new(&runtime);
7039        {
7040            let conn = h.open_db();
7041            let r1 = seed_episode(
7042                &conn,
7043                "66666666-6666-7000-8000-000000000001",
7044                100,
7045                "alice ep one",
7046            );
7047            let r2 = seed_episode(
7048                &conn,
7049                "66666666-6666-7000-8000-000000000002",
7050                200,
7051                "alice ep two",
7052            );
7053            let r3 = seed_episode(
7054                &conn,
7055                "66666666-6666-7000-8000-000000000003",
7056                300,
7057                "alice ep three",
7058            );
7059            // 3 triples all mentioning Alice on one side or another.
7060            seed_triple_row(&conn, "t1", "Alice", "p", "Bob", Some(r1));
7061            seed_triple_row(&conn, "t2", "Carol", "p", "Alice", Some(r2));
7062            seed_triple_row(&conn, "t3", "Alice", "q", "Dave", Some(r3));
7063            // One triple with no source — must be skipped by the IS NOT NULL filter.
7064            seed_triple_row(&conn, "t-orphan", "Alice", "p", "Eve", None);
7065        }
7066        let (status, body) = runtime.block_on(call(
7067            h.router.clone(),
7068            "GET",
7069            &graph_uri("ent:Alice", "triple"),
7070            None,
7071        ));
7072        assert_eq!(status, StatusCode::OK, "body: {body}");
7073        let nodes = body["nodes"].as_array().unwrap();
7074        let edges = body["edges"].as_array().unwrap();
7075        assert_eq!(nodes.len(), 3, "expected 3 episodes: {body}");
7076        assert_eq!(edges.len(), 3);
7077        for n in nodes {
7078            assert_eq!(n["kind"], "episode");
7079        }
7080        for e in edges {
7081            assert_eq!(e["source"], "ent:Alice");
7082            assert_eq!(e["kind"], "triple");
7083        }
7084        h.shutdown(&runtime);
7085    }
7086
7087    #[test]
7088    fn expand_semantic_from_episode_returns_similar() {
7089        let runtime = rt();
7090        let h = Harness::new(&runtime);
7091        // Seed three episodes via the writer-actor so they get embedded
7092        // + inserted into HNSW. StubEmbedder is deterministic: identical
7093        // content → identical vector → cos_distance = 0. So we use
7094        // distinct strings, then expand from one of them and assert at
7095        // least one similar peer comes back.
7096        runtime.block_on(async {
7097            let mid1 = post_remember(h.router.clone(), "alpha alpha alpha").await;
7098            let _mid2 = post_remember(h.router.clone(), "beta beta beta").await;
7099            let _mid3 = post_remember(h.router.clone(), "gamma gamma gamma").await;
7100            // Expand from mid1.
7101            let (status, body) = call(
7102                h.router.clone(),
7103                "GET",
7104                &graph_uri_with_limit(&format!("ep:{mid1}"), "semantic", 5),
7105                None,
7106            )
7107            .await;
7108            assert_eq!(status, StatusCode::OK, "body: {body}");
7109            let nodes = body["nodes"].as_array().unwrap();
7110            let edges = body["edges"].as_array().unwrap();
7111            // Must NOT include the source.
7112            for n in nodes {
7113                assert_ne!(
7114                    n["id"].as_str().unwrap(),
7115                    format!("ep:{mid1}"),
7116                    "self must be excluded: {body}"
7117                );
7118            }
7119            // Edges must be tagged semantic with a numeric weight.
7120            for e in edges {
7121                assert_eq!(e["kind"], "semantic");
7122                assert!(e["weight"].is_number(), "weight set: {body}");
7123            }
7124        });
7125        h.shutdown(&runtime);
7126    }
7127
7128    /// Helper: POST /memory and return the new memory_id.
7129    async fn post_remember(router: axum::Router, content: &str) -> String {
7130        let (status, body) = call(
7131            router,
7132            "POST",
7133            "/memory",
7134            Some(json!({ "content": content })),
7135        )
7136        .await;
7137        assert_eq!(status, StatusCode::OK, "post failed: {body}");
7138        body["memory_id"].as_str().unwrap().to_string()
7139    }
7140
7141    #[test]
7142    fn expand_400_on_invalid_kind() {
7143        let runtime = rt();
7144        let h = Harness::new(&runtime);
7145        let (status, _body) = runtime.block_on(call(
7146            h.router.clone(),
7147            "GET",
7148            "/v1/graph/expand?node_id=ep:any&kind=banana",
7149            None,
7150        ));
7151        // axum's Query extractor rejects unknown enum value with 400/422.
7152        assert!(
7153            status == StatusCode::BAD_REQUEST || status == StatusCode::UNPROCESSABLE_ENTITY,
7154            "expected 400/422 for bad kind, got {status}"
7155        );
7156        h.shutdown(&runtime);
7157    }
7158
7159    #[test]
7160    fn expand_400_on_invalid_node_for_kind() {
7161        let runtime = rt();
7162        let h = Harness::new(&runtime);
7163        // kind=semantic from a cluster source → 400.
7164        let (status, body) = runtime.block_on(call(
7165            h.router.clone(),
7166            "GET",
7167            &graph_uri("cl:doesnt-matter", "semantic"),
7168            None,
7169        ));
7170        assert_eq!(status, StatusCode::BAD_REQUEST);
7171        assert!(
7172            body["error"]
7173                .as_str()
7174                .is_some_and(|s| s.contains("semantic only valid for episode")),
7175            "got: {body}"
7176        );
7177        h.shutdown(&runtime);
7178    }
7179
7180    #[test]
7181    fn expand_404_on_missing_node_id() {
7182        let runtime = rt();
7183        let h = Harness::new(&runtime);
7184        let (status, body) = runtime.block_on(call(
7185            h.router.clone(),
7186            "GET",
7187            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
7188            None,
7189        ));
7190        assert_eq!(status, StatusCode::NOT_FOUND, "{body}");
7191        h.shutdown(&runtime);
7192    }
7193
7194    #[test]
7195    fn expand_limit_clamped_at_100() {
7196        let runtime = rt();
7197        let h = Harness::new(&runtime);
7198        // Seed > 100 cluster members so we can see the clamp in action.
7199        {
7200            let conn = h.open_db();
7201            seed_cluster_row(&conn, "cl-huge", 1_000);
7202            for i in 0..150 {
7203                let mid = format!("77777777-7777-7000-8000-{:012}", i);
7204                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
7205                seed_cluster_member(&conn, "cl-huge", &mid);
7206            }
7207        }
7208        let (status, body) = runtime.block_on(call(
7209            h.router.clone(),
7210            "GET",
7211            &graph_uri_with_limit("cl:cl-huge", "cluster_member", 999),
7212            None,
7213        ));
7214        assert_eq!(status, StatusCode::OK, "body: {body}");
7215        let nodes = body["nodes"].as_array().unwrap();
7216        assert_eq!(
7217            nodes.len(),
7218            100,
7219            "limit must be silently clamped to 100, got {}",
7220            nodes.len()
7221        );
7222        h.shutdown(&runtime);
7223    }
7224
7225    #[test]
7226    fn expand_bad_node_id_prefix_returns_400() {
7227        let runtime = rt();
7228        let h = Harness::new(&runtime);
7229        let (status, body) = runtime.block_on(call(
7230            h.router.clone(),
7231            "GET",
7232            "/v1/graph/expand?node_id=garbage&kind=cluster_member",
7233            None,
7234        ));
7235        assert_eq!(status, StatusCode::BAD_REQUEST);
7236        assert!(
7237            body["error"]
7238                .as_str()
7239                .is_some_and(|s| s.contains("node_id must be")),
7240            "got: {body}"
7241        );
7242        h.shutdown(&runtime);
7243    }
7244
7245    #[test]
7246    fn expand_respects_tenant_scoping_via_unknown_tenant_header() {
7247        // Routing via X-Solo-Tenant: a header pointing to an unknown
7248        // tenant must 404 before the handler even runs — the
7249        // TenantExtractor is the gatekeeper, so node ids can't be
7250        // resolved against the wrong tenant's DB.
7251        let runtime = rt();
7252        let h = Harness::new(&runtime);
7253        // Seed a real episode in the default tenant so we know it
7254        // exists there. If tenant scoping leaked, this lookup would 200
7255        // even with the wrong tenant header.
7256        let memory_id = "88888888-8888-7000-8000-000000000001";
7257        {
7258            let conn = h.open_db();
7259            seed_episode(&conn, memory_id, 100, "scoped");
7260            seed_cluster_row(&conn, "cl-scoped", 200);
7261            seed_cluster_member(&conn, "cl-scoped", memory_id);
7262        }
7263        let node_id = format!("ep:{memory_id}");
7264        let r = h.router.clone();
7265        let (status, _body) = runtime.block_on(async {
7266            let req = Request::builder()
7267                .method("GET")
7268                .uri(graph_uri(&node_id, "cluster_member"))
7269                .header("x-solo-tenant", "never-registered-tenant")
7270                .body(Body::empty())
7271                .unwrap();
7272            let resp = r.oneshot(req).await.expect("oneshot");
7273            let s = resp.status();
7274            let _b = resp.into_body().collect().await.unwrap().to_bytes();
7275            (s, _b)
7276        });
7277        // Unknown tenant id → 404 from the registry. Confirms cross-tenant
7278        // lookups can't smuggle through this endpoint.
7279        assert_eq!(status, StatusCode::NOT_FOUND);
7280        h.shutdown(&runtime);
7281    }
7282
7283    #[test]
7284    fn expand_respects_auth_when_enabled() {
7285        let runtime = rt();
7286        let h = Harness::new_with_auth(&runtime, Some("graph-secret".into()));
7287        // No Authorization header → 401.
7288        let (status, _) = runtime.block_on(call(
7289            h.router.clone(),
7290            "GET",
7291            &graph_uri("ep:any", "cluster_member"),
7292            None,
7293        ));
7294        assert_eq!(status, StatusCode::UNAUTHORIZED);
7295        // Right token → handler runs (404 for unknown node, NOT 401).
7296        let (status, _) = runtime.block_on(call_with_auth(
7297            h.router.clone(),
7298            "GET",
7299            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
7300            None,
7301            Some("Bearer graph-secret"),
7302        ));
7303        assert_eq!(status, StatusCode::NOT_FOUND);
7304        h.shutdown(&runtime);
7305    }
7306
7307    #[test]
7308    fn expand_works_when_auth_none() {
7309        let runtime = rt();
7310        let h = Harness::new(&runtime);
7311        // Unauthenticated request hits the handler; 404 for unknown node
7312        // proves the auth-none path doesn't reject the request.
7313        let (status, _) = runtime.block_on(call(
7314            h.router.clone(),
7315            "GET",
7316            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
7317            None,
7318        ));
7319        assert_eq!(status, StatusCode::NOT_FOUND);
7320        h.shutdown(&runtime);
7321    }
7322
7323    // ---------------------------------------------------------------------
7324    // v0.10.0: GET /v1/graph/nodes + GET /v1/graph/edges
7325    //
7326    // Paginated catalog reads. Both endpoints share auth + tenant +
7327    // cursor scaffolding from /v1/graph/expand, so tests focus on the
7328    // new surface: filter parsing, entity synthesis cap, cursor round-
7329    // trip, edge-type defaults (semantic excluded), and the semantic
7330    // 400 redirect to /v1/graph/neighbors.
7331    // ---------------------------------------------------------------------
7332
7333    /// Lower-level helper that captures response headers in addition to
7334    /// status + JSON body. Used by the entity-cap header test.
7335    async fn call_with_headers(
7336        router: axum::Router,
7337        method: &str,
7338        uri: &str,
7339    ) -> (StatusCode, axum::http::HeaderMap, Value) {
7340        let req = Request::builder()
7341            .method(method)
7342            .uri(uri)
7343            .header("content-length", "0")
7344            .body(Body::empty())
7345            .unwrap();
7346        let resp = router.oneshot(req).await.expect("oneshot");
7347        let status = resp.status();
7348        let headers = resp.headers().clone();
7349        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
7350        let v: Value = if body_bytes.is_empty() {
7351            Value::Null
7352        } else {
7353            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
7354        };
7355        (status, headers, v)
7356    }
7357
7358    #[test]
7359    fn nodes_returns_all_kinds_when_no_filter() {
7360        let runtime = rt();
7361        let h = Harness::new(&runtime);
7362        {
7363            let conn = h.open_db();
7364            let rowid = seed_episode(
7365                &conn,
7366                "aaaaaaaa-0000-7000-8000-000000000001",
7367                100,
7368                "episode one",
7369            );
7370            seed_document_row(&conn, "doc-1", "doc one");
7371            seed_chunk_row(&conn, "chunk-1", "doc-1", 0, "chunk one body");
7372            seed_cluster_row(&conn, "cl-one", 200);
7373            seed_triple_row(
7374                &conn,
7375                "t-one",
7376                "Alice",
7377                "knows",
7378                "Bob",
7379                Some(rowid),
7380            );
7381        }
7382        let (status, body) = runtime.block_on(call(
7383            h.router.clone(),
7384            "GET",
7385            "/v1/graph/nodes",
7386            None,
7387        ));
7388        assert_eq!(status, StatusCode::OK, "body: {body}");
7389        let nodes = body["nodes"].as_array().unwrap();
7390        let kinds: std::collections::HashSet<&str> = nodes
7391            .iter()
7392            .map(|n| n["kind"].as_str().unwrap())
7393            .collect();
7394        for expected in ["episode", "document", "chunk", "cluster", "entity"] {
7395            assert!(
7396                kinds.contains(expected),
7397                "expected {expected} kind in response: {body}"
7398            );
7399        }
7400        h.shutdown(&runtime);
7401    }
7402
7403    #[test]
7404    fn nodes_filter_by_single_kind() {
7405        let runtime = rt();
7406        let h = Harness::new(&runtime);
7407        {
7408            let conn = h.open_db();
7409            seed_episode(&conn, "bbbbbbbb-0000-7000-8000-000000000001", 100, "ep");
7410            seed_document_row(&conn, "doc-only", "d");
7411            seed_cluster_row(&conn, "cl-only", 300);
7412        }
7413        let (status, body) = runtime.block_on(call(
7414            h.router.clone(),
7415            "GET",
7416            "/v1/graph/nodes?kind=episode",
7417            None,
7418        ));
7419        assert_eq!(status, StatusCode::OK, "body: {body}");
7420        let nodes = body["nodes"].as_array().unwrap();
7421        assert!(!nodes.is_empty(), "{body}");
7422        for n in nodes {
7423            assert_eq!(n["kind"], "episode", "kind filter must be exclusive: {body}");
7424        }
7425        h.shutdown(&runtime);
7426    }
7427
7428    #[test]
7429    fn nodes_filter_by_multiple_kinds() {
7430        let runtime = rt();
7431        let h = Harness::new(&runtime);
7432        {
7433            let conn = h.open_db();
7434            seed_episode(&conn, "cccccccc-0000-7000-8000-000000000001", 100, "ep");
7435            seed_document_row(&conn, "doc-multi", "d");
7436            seed_cluster_row(&conn, "cl-multi", 300);
7437        }
7438        let (status, body) = runtime.block_on(call(
7439            h.router.clone(),
7440            "GET",
7441            "/v1/graph/nodes?kind=episode,document",
7442            None,
7443        ));
7444        assert_eq!(status, StatusCode::OK, "body: {body}");
7445        let nodes = body["nodes"].as_array().unwrap();
7446        let kinds: std::collections::HashSet<&str> = nodes
7447            .iter()
7448            .map(|n| n["kind"].as_str().unwrap())
7449            .collect();
7450        assert!(kinds.contains("episode"), "{body}");
7451        assert!(kinds.contains("document"), "{body}");
7452        assert!(
7453            !kinds.contains("cluster"),
7454            "cluster must be filtered out: {body}"
7455        );
7456        h.shutdown(&runtime);
7457    }
7458
7459    #[test]
7460    fn nodes_entity_synthesis_caps_at_200() {
7461        let runtime = rt();
7462        let h = Harness::new(&runtime);
7463        {
7464            let conn = h.open_db();
7465            // Seed one episode + 250 distinct triple object values so the
7466            // entity rollup surfaces >200 entities. ref_count is 1 for
7467            // each; pick subject = "Alice" for all so the entity count
7468            // collapses on subject (1 "Alice") + 250 distinct objects.
7469            let rowid = seed_episode(
7470                &conn,
7471                "dddddddd-0000-7000-8000-000000000001",
7472                100,
7473                "ep",
7474            );
7475            for i in 0..250 {
7476                let triple_id = format!("t-cap-{i:03}");
7477                let obj = format!("Entity{i:03}");
7478                seed_triple_row(&conn, &triple_id, "Alice", "knows", &obj, Some(rowid));
7479            }
7480        }
7481        let (status, headers, body) = runtime.block_on(call_with_headers(
7482            h.router.clone(),
7483            "GET",
7484            "/v1/graph/nodes?kind=entity&limit=500",
7485        ));
7486        assert_eq!(status, StatusCode::OK, "body: {body}");
7487        let nodes = body["nodes"].as_array().unwrap();
7488        assert_eq!(
7489            nodes.len(),
7490            200,
7491            "entity cap must be enforced at 200, got {}",
7492            nodes.len()
7493        );
7494        assert_eq!(
7495            headers
7496                .get("x-solo-entity-cap-reached")
7497                .and_then(|v| v.to_str().ok()),
7498            Some("true"),
7499            "cap-reached header missing: headers={headers:?}"
7500        );
7501        for n in nodes {
7502            assert_eq!(n["kind"], "entity");
7503        }
7504        h.shutdown(&runtime);
7505    }
7506
7507    #[test]
7508    fn nodes_since_until_filter_works() {
7509        let runtime = rt();
7510        let h = Harness::new(&runtime);
7511        {
7512            let conn = h.open_db();
7513            seed_episode(
7514                &conn,
7515                "eeeeeeee-0000-7000-8000-000000000001",
7516                100,
7517                "early",
7518            );
7519            seed_episode(
7520                &conn,
7521                "eeeeeeee-0000-7000-8000-000000000002",
7522                500,
7523                "middle",
7524            );
7525            seed_episode(
7526                &conn,
7527                "eeeeeeee-0000-7000-8000-000000000003",
7528                1000,
7529                "late",
7530            );
7531        }
7532        let (status, body) = runtime.block_on(call(
7533            h.router.clone(),
7534            "GET",
7535            "/v1/graph/nodes?kind=episode&since_ms=400&until_ms=600",
7536            None,
7537        ));
7538        assert_eq!(status, StatusCode::OK, "body: {body}");
7539        let nodes = body["nodes"].as_array().unwrap();
7540        assert_eq!(nodes.len(), 1, "{body}");
7541        assert_eq!(
7542            nodes[0]["id"],
7543            "ep:eeeeeeee-0000-7000-8000-000000000002"
7544        );
7545        h.shutdown(&runtime);
7546    }
7547
7548    #[test]
7549    fn nodes_pagination_round_trip() {
7550        let runtime = rt();
7551        let h = Harness::new(&runtime);
7552        {
7553            let conn = h.open_db();
7554            for i in 0..150 {
7555                let mid = format!("f0000000-0000-7000-8000-{i:012}");
7556                // ts_ms scales with i so the sort order is deterministic;
7557                // newest (highest i) appears first.
7558                seed_episode(&conn, &mid, 1_000 + i as i64, "page");
7559            }
7560        }
7561        let limit = 50u32;
7562        let mut seen: std::collections::HashSet<String> = Default::default();
7563        let mut next_cursor: Option<String> = None;
7564        for page_idx in 0..4 {
7565            let cursor_param = next_cursor
7566                .as_deref()
7567                .map(|c| format!("&cursor={c}"))
7568                .unwrap_or_default();
7569            let uri = format!(
7570                "/v1/graph/nodes?kind=episode&limit={limit}{cursor_param}"
7571            );
7572            let (status, body) =
7573                runtime.block_on(call(h.router.clone(), "GET", &uri, None));
7574            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
7575            let nodes = body["nodes"].as_array().unwrap();
7576            assert!(
7577                nodes.len() <= limit as usize,
7578                "page {page_idx} over-fetched: {body}"
7579            );
7580            for n in nodes {
7581                let id = n["id"].as_str().unwrap().to_string();
7582                assert!(seen.insert(id.clone()), "duplicate id across pages: {id}");
7583            }
7584            next_cursor = body
7585                .get("next_cursor")
7586                .and_then(|v| v.as_str())
7587                .map(|s| s.to_string());
7588            if next_cursor.is_none() {
7589                break;
7590            }
7591        }
7592        assert_eq!(
7593            seen.len(),
7594            150,
7595            "expected 150 distinct ids across pages, got {}",
7596            seen.len()
7597        );
7598        assert!(
7599            next_cursor.is_none(),
7600            "cursor should be null after last page; got {next_cursor:?}"
7601        );
7602        h.shutdown(&runtime);
7603    }
7604
7605    #[test]
7606    fn nodes_respects_tenant_scoping() {
7607        let runtime = rt();
7608        let h = Harness::new(&runtime);
7609        {
7610            let conn = h.open_db();
7611            seed_episode(
7612                &conn,
7613                "11110000-0000-7000-8000-000000000001",
7614                100,
7615                "tenant scope",
7616            );
7617        }
7618        // Request against a never-registered tenant header → 404 from
7619        // the tenant extractor before the handler runs.
7620        let r = h.router.clone();
7621        let (status, _body) = runtime.block_on(async {
7622            let req = Request::builder()
7623                .method("GET")
7624                .uri("/v1/graph/nodes")
7625                .header("x-solo-tenant", "never-registered-tenant")
7626                .body(Body::empty())
7627                .unwrap();
7628            let resp = r.oneshot(req).await.expect("oneshot");
7629            let s = resp.status();
7630            let _b = resp.into_body().collect().await.unwrap().to_bytes();
7631            (s, _b)
7632        });
7633        assert_eq!(status, StatusCode::NOT_FOUND);
7634        h.shutdown(&runtime);
7635    }
7636
7637    #[test]
7638    fn nodes_respects_auth_when_enabled() {
7639        let runtime = rt();
7640        let h = Harness::new_with_auth(&runtime, Some("nodes-secret".into()));
7641        let (status, _) = runtime.block_on(call(
7642            h.router.clone(),
7643            "GET",
7644            "/v1/graph/nodes",
7645            None,
7646        ));
7647        assert_eq!(
7648            status,
7649            StatusCode::UNAUTHORIZED,
7650            "must reject unauthenticated request"
7651        );
7652        let (status, _) = runtime.block_on(call_with_auth(
7653            h.router.clone(),
7654            "GET",
7655            "/v1/graph/nodes",
7656            None,
7657            Some("Bearer nodes-secret"),
7658        ));
7659        assert_eq!(status, StatusCode::OK, "must pass through with bearer");
7660        h.shutdown(&runtime);
7661    }
7662
7663    #[test]
7664    fn nodes_works_with_auth_none() {
7665        let runtime = rt();
7666        let h = Harness::new(&runtime);
7667        let (status, body) = runtime.block_on(call(
7668            h.router.clone(),
7669            "GET",
7670            "/v1/graph/nodes",
7671            None,
7672        ));
7673        assert_eq!(status, StatusCode::OK, "{body}");
7674        assert!(body.get("nodes").is_some());
7675        h.shutdown(&runtime);
7676    }
7677
7678    // --- /v1/graph/edges ---
7679
7680    #[test]
7681    fn edges_returns_all_default_kinds() {
7682        let runtime = rt();
7683        let h = Harness::new(&runtime);
7684        {
7685            let conn = h.open_db();
7686            let rowid = seed_episode(
7687                &conn,
7688                "22220000-0000-7000-8000-000000000001",
7689                100,
7690                "ep src",
7691            );
7692            seed_triple_row(&conn, "t-def", "Alice", "knows", "Bob", Some(rowid));
7693            seed_document_row(&conn, "doc-e", "doc");
7694            seed_chunk_row(&conn, "c-e", "doc-e", 0, "chunk");
7695            seed_cluster_row(&conn, "cl-e", 200);
7696            seed_cluster_member(
7697                &conn,
7698                "cl-e",
7699                "22220000-0000-7000-8000-000000000001",
7700            );
7701        }
7702        let (status, body) = runtime.block_on(call(
7703            h.router.clone(),
7704            "GET",
7705            "/v1/graph/edges",
7706            None,
7707        ));
7708        assert_eq!(status, StatusCode::OK, "body: {body}");
7709        let edges = body["edges"].as_array().unwrap();
7710        let kinds: std::collections::HashSet<&str> = edges
7711            .iter()
7712            .map(|e| e["kind"].as_str().unwrap())
7713            .collect();
7714        assert!(kinds.contains("triple"), "{body}");
7715        assert!(kinds.contains("document_chunk"), "{body}");
7716        assert!(kinds.contains("cluster_member"), "{body}");
7717        assert!(
7718            !kinds.contains("semantic"),
7719            "semantic is NOT in default response: {body}"
7720        );
7721        h.shutdown(&runtime);
7722    }
7723
7724    #[test]
7725    fn edges_filter_by_node_id_finds_incident_edges() {
7726        let runtime = rt();
7727        let h = Harness::new(&runtime);
7728        let memory_id = "33330000-0000-7000-8000-000000000001";
7729        {
7730            let conn = h.open_db();
7731            let rowid = seed_episode(&conn, memory_id, 100, "ep multi-triple");
7732            seed_triple_row(&conn, "t-a", "Alice", "p", "Bob", Some(rowid));
7733            seed_triple_row(&conn, "t-b", "Alice", "p", "Carol", Some(rowid));
7734            seed_triple_row(&conn, "t-c", "Alice", "p", "Dave", Some(rowid));
7735            // Decoy episode with its own triple — must NOT come back.
7736            let decoy_rowid = seed_episode(
7737                &conn,
7738                "33330000-0000-7000-8000-000000000999",
7739                200,
7740                "decoy",
7741            );
7742            seed_triple_row(
7743                &conn,
7744                "t-decoy",
7745                "Alice",
7746                "p",
7747                "Eve",
7748                Some(decoy_rowid),
7749            );
7750        }
7751        let uri = format!(
7752            "/v1/graph/edges?type=triple&node_id={}",
7753            percent_encode_node_id(&format!("ep:{memory_id}"))
7754        );
7755        let (status, body) =
7756            runtime.block_on(call(h.router.clone(), "GET", &uri, None));
7757        assert_eq!(status, StatusCode::OK, "body: {body}");
7758        let edges = body["edges"].as_array().unwrap();
7759        assert_eq!(edges.len(), 3, "expected 3 incident edges: {body}");
7760        for e in edges {
7761            assert_eq!(e["source"], format!("ep:{memory_id}"));
7762            assert_eq!(e["kind"], "triple");
7763        }
7764        h.shutdown(&runtime);
7765    }
7766
7767    #[test]
7768    fn edges_filter_by_type_works() {
7769        let runtime = rt();
7770        let h = Harness::new(&runtime);
7771        {
7772            let conn = h.open_db();
7773            let rowid = seed_episode(
7774                &conn,
7775                "44440000-0000-7000-8000-000000000001",
7776                100,
7777                "ep",
7778            );
7779            seed_triple_row(&conn, "t-only", "Alice", "p", "Bob", Some(rowid));
7780            seed_document_row(&conn, "doc-skip", "doc");
7781            seed_chunk_row(&conn, "c-skip", "doc-skip", 0, "chunk");
7782        }
7783        let (status, body) = runtime.block_on(call(
7784            h.router.clone(),
7785            "GET",
7786            "/v1/graph/edges?type=triple",
7787            None,
7788        ));
7789        assert_eq!(status, StatusCode::OK, "{body}");
7790        let edges = body["edges"].as_array().unwrap();
7791        assert!(!edges.is_empty(), "{body}");
7792        for e in edges {
7793            assert_eq!(e["kind"], "triple", "{body}");
7794        }
7795        h.shutdown(&runtime);
7796    }
7797
7798    #[test]
7799    fn edges_rejects_semantic_type_with_400() {
7800        let runtime = rt();
7801        let h = Harness::new(&runtime);
7802        let (status, body) = runtime.block_on(call(
7803            h.router.clone(),
7804            "GET",
7805            "/v1/graph/edges?type=semantic",
7806            None,
7807        ));
7808        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
7809        let err = body["error"].as_str().unwrap_or_default();
7810        assert!(
7811            err.contains("/v1/graph/neighbors"),
7812            "error must point to /v1/graph/neighbors: {body}"
7813        );
7814        h.shutdown(&runtime);
7815    }
7816
7817    #[test]
7818    fn edges_pagination_round_trip() {
7819        let runtime = rt();
7820        let h = Harness::new(&runtime);
7821        {
7822            let conn = h.open_db();
7823            let rowid = seed_episode(
7824                &conn,
7825                "55550000-0000-7000-8000-000000000001",
7826                100,
7827                "ep big",
7828            );
7829            // 60 triples → 60 triple edges. limit=25 → 3 pages.
7830            for i in 0..60 {
7831                let tid = format!("t-page-{i:03}");
7832                let obj = format!("Obj{i:03}");
7833                seed_triple_row(&conn, &tid, "Alice", "p", &obj, Some(rowid));
7834            }
7835        }
7836        let limit = 25u32;
7837        let mut seen: std::collections::HashSet<String> = Default::default();
7838        let mut next_cursor: Option<String> = None;
7839        for page_idx in 0..5 {
7840            let cursor_param = next_cursor
7841                .as_deref()
7842                .map(|c| format!("&cursor={c}"))
7843                .unwrap_or_default();
7844            let uri = format!(
7845                "/v1/graph/edges?type=triple&limit={limit}{cursor_param}"
7846            );
7847            let (status, body) =
7848                runtime.block_on(call(h.router.clone(), "GET", &uri, None));
7849            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
7850            let edges = body["edges"].as_array().unwrap();
7851            for e in edges {
7852                let id = e["id"].as_str().unwrap().to_string();
7853                assert!(seen.insert(id.clone()), "duplicate edge id: {id}");
7854            }
7855            next_cursor = body
7856                .get("next_cursor")
7857                .and_then(|v| v.as_str())
7858                .map(|s| s.to_string());
7859            if next_cursor.is_none() {
7860                break;
7861            }
7862        }
7863        assert_eq!(
7864            seen.len(),
7865            60,
7866            "expected 60 distinct edges, got {}",
7867            seen.len()
7868        );
7869        assert!(next_cursor.is_none(), "expected exhausted cursor");
7870        h.shutdown(&runtime);
7871    }
7872
7873    #[test]
7874    fn edges_respects_tenant_scoping() {
7875        let runtime = rt();
7876        let h = Harness::new(&runtime);
7877        {
7878            let conn = h.open_db();
7879            let rowid = seed_episode(
7880                &conn,
7881                "66660000-0000-7000-8000-000000000001",
7882                100,
7883                "ep",
7884            );
7885            seed_triple_row(&conn, "t-tenant", "Alice", "p", "Bob", Some(rowid));
7886        }
7887        let r = h.router.clone();
7888        let (status, _) = runtime.block_on(async {
7889            let req = Request::builder()
7890                .method("GET")
7891                .uri("/v1/graph/edges")
7892                .header("x-solo-tenant", "never-registered-tenant")
7893                .body(Body::empty())
7894                .unwrap();
7895            let resp = r.oneshot(req).await.expect("oneshot");
7896            let s = resp.status();
7897            let _b = resp.into_body().collect().await.unwrap().to_bytes();
7898            (s, _b)
7899        });
7900        assert_eq!(status, StatusCode::NOT_FOUND);
7901        h.shutdown(&runtime);
7902    }
7903
7904    #[test]
7905    fn edges_respects_auth_when_enabled() {
7906        let runtime = rt();
7907        let h = Harness::new_with_auth(&runtime, Some("edges-secret".into()));
7908        let (status, _) = runtime.block_on(call(
7909            h.router.clone(),
7910            "GET",
7911            "/v1/graph/edges",
7912            None,
7913        ));
7914        assert_eq!(status, StatusCode::UNAUTHORIZED);
7915        let (status, _) = runtime.block_on(call_with_auth(
7916            h.router.clone(),
7917            "GET",
7918            "/v1/graph/edges",
7919            None,
7920            Some("Bearer edges-secret"),
7921        ));
7922        assert_eq!(status, StatusCode::OK);
7923        h.shutdown(&runtime);
7924    }
7925
7926    // ---------------------------------------------------------------------
7927    // v0.10.0: GET /v1/graph/inspect/{id}
7928    //
7929    // Kind-discriminated full-record drill. Shares auth + tenant + node-id
7930    // prefix scaffolding with /v1/graph/expand and /v1/graph/{nodes,edges},
7931    // so tests focus on the new surface: per-kind full_text source +
7932    // triples_in/out shape + entity zero-triple 404 semantics + the
7933    // standard 400/404/auth/tenant cases.
7934    // ---------------------------------------------------------------------
7935
7936    fn inspect_uri(node_id: &str) -> String {
7937        // Path parameter must be percent-encoded (`:` is `%3A` after
7938        // the URI parser splits segments). axum's Path<String>
7939        // extractor percent-decodes automatically.
7940        format!("/v1/graph/inspect/{}", percent_encode_node_id(node_id))
7941    }
7942
7943    #[test]
7944    fn inspect_episode_returns_full_text_plus_triples_out() {
7945        let runtime = rt();
7946        let h = Harness::new(&runtime);
7947        let memory_id = "a1110000-0000-7000-8000-000000000001";
7948        let full_text = "Met Alice for coffee at the new place. She mentioned the project is on track but they're hitting issues with the deploy pipeline.";
7949        {
7950            let conn = h.open_db();
7951            let rowid = seed_episode(&conn, memory_id, 1_715_625_600_000, full_text);
7952            seed_triple_row(&conn, "t-ep-1", "user", "met_with", "Alice", Some(rowid));
7953            seed_triple_row(&conn, "t-ep-2", "user", "discussed", "deploy_pipeline", Some(rowid));
7954            seed_triple_row(&conn, "t-ep-3", "Alice", "works_on", "project", Some(rowid));
7955        }
7956        let (status, body) = runtime.block_on(call(
7957            h.router.clone(),
7958            "GET",
7959            &inspect_uri(&format!("ep:{memory_id}")),
7960            None,
7961        ));
7962        assert_eq!(status, StatusCode::OK, "body: {body}");
7963        assert_eq!(body["node"]["kind"], "episode");
7964        assert_eq!(body["node"]["id"], format!("ep:{memory_id}"));
7965        assert_eq!(
7966            body["full_text"].as_str().unwrap(),
7967            full_text,
7968            "full_text must match episodes.content verbatim, untruncated"
7969        );
7970        let triples_out = body["triples_out"].as_array().unwrap();
7971        assert_eq!(triples_out.len(), 3, "{body}");
7972        let triples_in = body["triples_in"].as_array().unwrap();
7973        assert!(triples_in.is_empty(), "episodes have no triples_in: {body}");
7974        for e in triples_out {
7975            assert_eq!(e["kind"], "triple");
7976            assert_eq!(e["source"], format!("ep:{memory_id}"));
7977            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
7978            assert!(e["predicate"].as_str().is_some());
7979            assert!(e["weight"].as_f64().is_some());
7980        }
7981        h.shutdown(&runtime);
7982    }
7983
7984    #[test]
7985    fn inspect_episode_triples_in_is_empty_for_v10p1() {
7986        // Seed an episode + a triple from a DIFFERENT episode that
7987        // happens to mention the focal episode's content. Even with
7988        // entities referencing the episode topic, episode.triples_in
7989        // is structurally empty in v0.10.0 P1.
7990        let runtime = rt();
7991        let h = Harness::new(&runtime);
7992        let focal = "a2220000-0000-7000-8000-000000000001";
7993        let other = "a2220000-0000-7000-8000-000000000002";
7994        {
7995            let conn = h.open_db();
7996            seed_episode(&conn, focal, 100, "focal episode body");
7997            let other_rowid = seed_episode(&conn, other, 200, "another episode");
7998            // Entity "user" gets referenced heavily; doesn't matter --
7999            // episode triples_in stays empty.
8000            for i in 0..5 {
8001                let tid = format!("t-other-{i}");
8002                seed_triple_row(&conn, &tid, "user", "did", "thing", Some(other_rowid));
8003            }
8004        }
8005        let (status, body) = runtime.block_on(call(
8006            h.router.clone(),
8007            "GET",
8008            &inspect_uri(&format!("ep:{focal}")),
8009            None,
8010        ));
8011        assert_eq!(status, StatusCode::OK, "body: {body}");
8012        let triples_in = body["triples_in"].as_array().unwrap();
8013        assert!(
8014            triples_in.is_empty(),
8015            "episode triples_in must be empty regardless of cross-episode entity references: {body}"
8016        );
8017        h.shutdown(&runtime);
8018    }
8019
8020    #[test]
8021    fn inspect_document_returns_full_text_concatenated_from_chunks() {
8022        let runtime = rt();
8023        let h = Harness::new(&runtime);
8024        let doc_id = "d3330000-0000-7000-8000-000000000001";
8025        {
8026            let conn = h.open_db();
8027            seed_document_row(&conn, doc_id, "doc-title");
8028            seed_chunk_row(&conn, "ch-doc-1", doc_id, 0, "First chunk body.");
8029            seed_chunk_row(&conn, "ch-doc-2", doc_id, 1, "Second chunk body.");
8030            seed_chunk_row(&conn, "ch-doc-3", doc_id, 2, "Third chunk body.");
8031        }
8032        let (status, body) = runtime.block_on(call(
8033            h.router.clone(),
8034            "GET",
8035            &inspect_uri(&format!("doc:{doc_id}")),
8036            None,
8037        ));
8038        assert_eq!(status, StatusCode::OK, "body: {body}");
8039        assert_eq!(body["node"]["kind"], "document");
8040        let full_text = body["full_text"].as_str().unwrap();
8041        // Concatenation order matches chunk_index ASC; separator is "\n\n".
8042        assert_eq!(
8043            full_text,
8044            "First chunk body.\n\nSecond chunk body.\n\nThird chunk body."
8045        );
8046        assert!(body["triples_in"].as_array().unwrap().is_empty());
8047        assert!(body["triples_out"].as_array().unwrap().is_empty());
8048        h.shutdown(&runtime);
8049    }
8050
8051    #[test]
8052    fn inspect_chunk_returns_text() {
8053        let runtime = rt();
8054        let h = Harness::new(&runtime);
8055        let chunk_body = "This is the body of the chunk being inspected.";
8056        {
8057            let conn = h.open_db();
8058            seed_document_row(&conn, "doc-chunk-host", "host");
8059            seed_chunk_row(&conn, "chunk-inspect-target", "doc-chunk-host", 0, chunk_body);
8060        }
8061        let (status, body) = runtime.block_on(call(
8062            h.router.clone(),
8063            "GET",
8064            &inspect_uri("chunk:chunk-inspect-target"),
8065            None,
8066        ));
8067        assert_eq!(status, StatusCode::OK, "body: {body}");
8068        assert_eq!(body["node"]["kind"], "chunk");
8069        assert_eq!(body["full_text"].as_str().unwrap(), chunk_body);
8070        assert!(body["triples_in"].as_array().unwrap().is_empty());
8071        assert!(body["triples_out"].as_array().unwrap().is_empty());
8072        h.shutdown(&runtime);
8073    }
8074
8075    #[test]
8076    fn inspect_cluster_returns_label_and_abstraction() {
8077        let runtime = rt();
8078        let h = Harness::new(&runtime);
8079        let cluster_id = "cl-inspect-target";
8080        let abstraction_text = "Discussions about the deploy pipeline and on-call rotation.";
8081        {
8082            let conn = h.open_db();
8083            seed_cluster_row(&conn, cluster_id, 12345);
8084            seed_abstraction_row(&conn, "abs-1", cluster_id, abstraction_text);
8085        }
8086        let (status, body) = runtime.block_on(call(
8087            h.router.clone(),
8088            "GET",
8089            &inspect_uri(&format!("cl:{cluster_id}")),
8090            None,
8091        ));
8092        assert_eq!(status, StatusCode::OK, "body: {body}");
8093        assert_eq!(body["node"]["kind"], "cluster");
8094        let full_text = body["full_text"].as_str().unwrap();
8095        assert!(
8096            full_text.contains(cluster_id),
8097            "full_text must include cluster label: {full_text}"
8098        );
8099        assert!(
8100            full_text.contains(abstraction_text),
8101            "full_text must include abstraction text: {full_text}"
8102        );
8103        // "label\n\nabstraction" -- separated by blank line for the
8104        // inspector renderer.
8105        assert!(full_text.contains("\n\n"), "label and abstraction must be separated: {full_text}");
8106        h.shutdown(&runtime);
8107    }
8108
8109    #[test]
8110    fn inspect_entity_returns_triples_only() {
8111        let runtime = rt();
8112        let h = Harness::new(&runtime);
8113        {
8114            let conn = h.open_db();
8115            let rowid = seed_episode(
8116                &conn,
8117                "e5550000-0000-7000-8000-000000000001",
8118                100,
8119                "host episode",
8120            );
8121            // 5 triples that reference Alice (as subject or object).
8122            seed_triple_row(&conn, "t-ent-1", "Alice", "knows", "Bob", Some(rowid));
8123            seed_triple_row(&conn, "t-ent-2", "Alice", "works_at", "Anthropic", Some(rowid));
8124            seed_triple_row(&conn, "t-ent-3", "user", "met", "Alice", Some(rowid));
8125            seed_triple_row(&conn, "t-ent-4", "Alice", "owns", "laptop", Some(rowid));
8126            seed_triple_row(&conn, "t-ent-5", "Carol", "mentors", "Alice", Some(rowid));
8127        }
8128        let (status, body) = runtime.block_on(call(
8129            h.router.clone(),
8130            "GET",
8131            &inspect_uri("ent:Alice"),
8132            None,
8133        ));
8134        assert_eq!(status, StatusCode::OK, "body: {body}");
8135        assert_eq!(body["node"]["kind"], "entity");
8136        assert_eq!(body["node"]["id"], "ent:Alice");
8137        assert!(
8138            body["full_text"].is_null(),
8139            "entity full_text must be null (entities have no body): {body}"
8140        );
8141        let triples_out = body["triples_out"].as_array().unwrap();
8142        assert_eq!(triples_out.len(), 5, "{body}");
8143        assert!(body["triples_in"].as_array().unwrap().is_empty());
8144        for e in triples_out {
8145            assert_eq!(e["kind"], "triple");
8146            assert_eq!(e["source"], "ent:Alice");
8147            // Counterpart is always an entity; Alice never appears on
8148            // both ends so target != source.
8149            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
8150            assert_ne!(e["target"], "ent:Alice");
8151        }
8152        h.shutdown(&runtime);
8153    }
8154
8155    #[test]
8156    fn inspect_entity_with_zero_triples_returns_404() {
8157        let runtime = rt();
8158        let h = Harness::new(&runtime);
8159        // Seed unrelated triples so the table isn't empty; the target
8160        // entity still has zero references.
8161        {
8162            let conn = h.open_db();
8163            let rowid = seed_episode(
8164                &conn,
8165                "e6660000-0000-7000-8000-000000000001",
8166                100,
8167                "ep",
8168            );
8169            seed_triple_row(&conn, "t-other", "Bob", "knows", "Carol", Some(rowid));
8170        }
8171        let (status, body) = runtime.block_on(call(
8172            h.router.clone(),
8173            "GET",
8174            &inspect_uri("ent:Nonexistent"),
8175            None,
8176        ));
8177        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
8178        let err = body["error"].as_str().unwrap_or_default();
8179        assert!(
8180            err.contains("Nonexistent") || err.contains("entity"),
8181            "error must mention entity: {body}"
8182        );
8183        h.shutdown(&runtime);
8184    }
8185
8186    #[test]
8187    fn inspect_404_on_missing_node() {
8188        // Well-formed `ep:` prefix + valid UUID shape, but no row in DB.
8189        let runtime = rt();
8190        let h = Harness::new(&runtime);
8191        let (status, body) = runtime.block_on(call(
8192            h.router.clone(),
8193            "GET",
8194            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
8195            None,
8196        ));
8197        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
8198        h.shutdown(&runtime);
8199    }
8200
8201    #[test]
8202    fn inspect_400_on_invalid_prefix() {
8203        let runtime = rt();
8204        let h = Harness::new(&runtime);
8205        let (status, body) = runtime.block_on(call(
8206            h.router.clone(),
8207            "GET",
8208            &inspect_uri("xyz:foo"),
8209            None,
8210        ));
8211        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
8212        let err = body["error"].as_str().unwrap_or_default();
8213        assert!(
8214            err.contains("xyz") || err.contains("prefix"),
8215            "error must mention bad prefix: {body}"
8216        );
8217        h.shutdown(&runtime);
8218    }
8219
8220    #[test]
8221    fn inspect_respects_tenant_scoping() {
8222        let runtime = rt();
8223        let h = Harness::new(&runtime);
8224        let memory_id = "a7770000-0000-7000-8000-000000000001";
8225        {
8226            let conn = h.open_db();
8227            seed_episode(&conn, memory_id, 100, "tenant scope");
8228        }
8229        // Real id in default tenant resolves; the same request against
8230        // a never-registered tenant header surfaces 404 from the tenant
8231        // extractor before the handler runs.
8232        let r = h.router.clone();
8233        let (status, _) = runtime.block_on(async {
8234            let req = Request::builder()
8235                .method("GET")
8236                .uri(inspect_uri(&format!("ep:{memory_id}")))
8237                .header("x-solo-tenant", "never-registered-tenant")
8238                .body(Body::empty())
8239                .unwrap();
8240            let resp = r.oneshot(req).await.expect("oneshot");
8241            let s = resp.status();
8242            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8243            (s, _b)
8244        });
8245        assert_eq!(status, StatusCode::NOT_FOUND);
8246        // Sanity: same id resolves on the default tenant.
8247        let (status, body) = runtime.block_on(call(
8248            h.router.clone(),
8249            "GET",
8250            &inspect_uri(&format!("ep:{memory_id}")),
8251            None,
8252        ));
8253        assert_eq!(status, StatusCode::OK, "default tenant must resolve: {body}");
8254        h.shutdown(&runtime);
8255    }
8256
8257    #[test]
8258    fn inspect_respects_auth_when_enabled() {
8259        let runtime = rt();
8260        let h = Harness::new_with_auth(&runtime, Some("inspect-secret".into()));
8261        // Missing bearer -> 401 before handler runs.
8262        let (status, _) = runtime.block_on(call(
8263            h.router.clone(),
8264            "GET",
8265            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
8266            None,
8267        ));
8268        assert_eq!(status, StatusCode::UNAUTHORIZED);
8269        // Valid bearer + unknown node -> handler runs and returns 404,
8270        // proving auth passed through.
8271        let (status, _) = runtime.block_on(call_with_auth(
8272            h.router.clone(),
8273            "GET",
8274            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
8275            None,
8276            Some("Bearer inspect-secret"),
8277        ));
8278        assert_eq!(status, StatusCode::NOT_FOUND);
8279        h.shutdown(&runtime);
8280    }
8281
8282    // ---------------------------------------------------------------------
8283    // v0.10.0: GET /v1/graph/neighbors/{id}
8284    //
8285    // Unified explicit + HNSW-semantic neighbor surface for solo-web's
8286    // "show similar" overlay. Tests cover the kind dispatch (explicit /
8287    // semantic / both default), threshold filter, limit clamp, dedupe
8288    // rule, and the standard 400/404/auth/tenant gates.
8289    // ---------------------------------------------------------------------
8290
8291    /// URL builder for the neighbors endpoint. `kind`/`threshold`/`limit`
8292    /// are all optional; pass `None` to omit the corresponding query
8293    /// parameter. The node id is percent-encoded so `:` survives the path
8294    /// extractor.
8295    fn neighbors_uri(
8296        node_id: &str,
8297        kind: Option<&str>,
8298        threshold: Option<f32>,
8299        limit: Option<u32>,
8300    ) -> String {
8301        let mut qs: Vec<String> = Vec::new();
8302        if let Some(k) = kind {
8303            qs.push(format!("kind={k}"));
8304        }
8305        if let Some(t) = threshold {
8306            qs.push(format!("threshold={t}"));
8307        }
8308        if let Some(l) = limit {
8309            qs.push(format!("limit={l}"));
8310        }
8311        let encoded = percent_encode_node_id(node_id);
8312        if qs.is_empty() {
8313            format!("/v1/graph/neighbors/{encoded}")
8314        } else {
8315            format!("/v1/graph/neighbors/{encoded}?{}", qs.join("&"))
8316        }
8317    }
8318
8319    /// 1. `?kind=explicit` returns only structural edges (no semantic).
8320    /// Seeds an episode with 2 explicit (triple) neighbors + several
8321    /// distinct other episodes so the semantic path COULD surface
8322    /// candidates. The `kind=explicit` filter must drop all of them.
8323    #[test]
8324    fn neighbors_explicit_only_returns_no_semantic_edges() {
8325        let runtime = rt();
8326        let h = Harness::new(&runtime);
8327        runtime.block_on(async {
8328            // Seed several episodes via the writer-actor so they get HNSW
8329            // entries -- the semantic path would surface these if it
8330            // wasn't filtered out.
8331            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
8332            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
8333            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
8334            // Add explicit triples sourced from `focal`. seed_triple_row
8335            // needs the focal rowid -- look it up via a side connection.
8336            {
8337                let conn = h.open_db();
8338                let rowid: i64 = conn
8339                    .query_row(
8340                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
8341                        rusqlite::params![&focal],
8342                        |r| r.get(0),
8343                    )
8344                    .unwrap();
8345                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
8346                seed_triple_row(&conn, "t-exp-2", "Alice", "owns", "laptop", Some(rowid));
8347            }
8348            let (status, body) = call(
8349                h.router.clone(),
8350                "GET",
8351                &neighbors_uri(&format!("ep:{focal}"), Some("explicit"), None, None),
8352                None,
8353            )
8354            .await;
8355            assert_eq!(status, StatusCode::OK, "body: {body}");
8356            let edges = body["edges"].as_array().unwrap();
8357            assert!(!edges.is_empty(), "expected explicit edges: {body}");
8358            for e in edges {
8359                assert_ne!(
8360                    e["kind"], "semantic",
8361                    "kind=explicit must drop semantic edges: {body}"
8362                );
8363            }
8364        });
8365        h.shutdown(&runtime);
8366    }
8367
8368    /// 2. `?kind=semantic` returns only HNSW edges (no explicit).
8369    /// Inverse of test 1 -- same fixture, opposite filter.
8370    #[test]
8371    fn neighbors_semantic_only_returns_no_explicit_edges() {
8372        let runtime = rt();
8373        let h = Harness::new(&runtime);
8374        runtime.block_on(async {
8375            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
8376            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
8377            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
8378            {
8379                let conn = h.open_db();
8380                let rowid: i64 = conn
8381                    .query_row(
8382                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
8383                        rusqlite::params![&focal],
8384                        |r| r.get(0),
8385                    )
8386                    .unwrap();
8387                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
8388            }
8389            // Threshold=0 so every HNSW hit clears the filter.
8390            let (status, body) = call(
8391                h.router.clone(),
8392                "GET",
8393                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
8394                None,
8395            )
8396            .await;
8397            assert_eq!(status, StatusCode::OK, "body: {body}");
8398            let edges = body["edges"].as_array().unwrap();
8399            for e in edges {
8400                assert_eq!(
8401                    e["kind"], "semantic",
8402                    "kind=semantic must drop explicit edges: {body}"
8403                );
8404                assert!(e["weight"].is_number(), "semantic edges carry weight: {body}");
8405            }
8406        });
8407        h.shutdown(&runtime);
8408    }
8409
8410    /// 3. Default (no `kind=` param) returns both explicit + semantic.
8411    #[test]
8412    fn neighbors_both_default_returns_combined() {
8413        let runtime = rt();
8414        let h = Harness::new(&runtime);
8415        runtime.block_on(async {
8416            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
8417            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
8418            {
8419                let conn = h.open_db();
8420                let rowid: i64 = conn
8421                    .query_row(
8422                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
8423                        rusqlite::params![&focal],
8424                        |r| r.get(0),
8425                    )
8426                    .unwrap();
8427                seed_triple_row(&conn, "t-both-1", "Alice", "met", "Bob", Some(rowid));
8428            }
8429            let (status, body) = call(
8430                h.router.clone(),
8431                "GET",
8432                // No kind param -> default = both. Threshold 0 so semantic
8433                // hits make it through the filter.
8434                &neighbors_uri(&format!("ep:{focal}"), None, Some(0.0), None),
8435                None,
8436            )
8437            .await;
8438            assert_eq!(status, StatusCode::OK, "body: {body}");
8439            let edges = body["edges"].as_array().unwrap();
8440            let kinds: std::collections::HashSet<&str> = edges
8441                .iter()
8442                .map(|e| e["kind"].as_str().unwrap())
8443                .collect();
8444            assert!(
8445                kinds.contains("triple"),
8446                "expected at least one triple edge: {body}"
8447            );
8448            assert!(
8449                kinds.contains("semantic"),
8450                "expected at least one semantic edge: {body}"
8451            );
8452        });
8453        h.shutdown(&runtime);
8454    }
8455
8456    /// 4. Dedupe rule. Construct an episode X whose semantic-neighbor Y
8457    /// is ALSO a triple-target -- i.e. the explicit and semantic paths
8458    /// both produce an edge X -> Y. After dedupe only the explicit edge
8459    /// survives.
8460    #[test]
8461    fn neighbors_dedupes_semantic_when_explicit_exists() {
8462        let runtime = rt();
8463        let h = Harness::new(&runtime);
8464        runtime.block_on(async {
8465            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
8466            // Seed an explicit triple from focal -> ent:peer-target.
8467            // The semantic path produces edges focal -> ep:<other>; we
8468            // ensure both paths produce an edge ending at the same id by
8469            // wiring `peer-target = ep:<other_memory_id>` -- but the
8470            // entity emitter uses `ent:` prefix, not `ep:`. So to force a
8471            // collision we need an edge form where source+target overlap.
8472            //
8473            // Simpler construction: the `expand_triple_from_episode` path
8474            // emits an edge `ent:subject -> ent:object`, not from the
8475            // focal episode -- meaning the explicit edges don't end at
8476            // an ep: node in the first place. So we have to engineer a
8477            // collision via the cluster_member path:
8478            //   * explicit: focal (episode) -> cluster (via cluster_member)
8479            //   * semantic: focal -> similar episode
8480            // The two endpoints (cluster vs. episode) never collide in
8481            // shape. To produce a real (source, target) overlap that
8482            // exercises the dedupe code, mint a synthetic semantic edge
8483            // by adding an explicit triple sourced from the focal that
8484            // happens to end at the SAME entity the semantic path would
8485            // emit -- but semantic only emits ep:/chunk: ids, never ent:.
8486            //
8487            // The brief flagged this scenario as unlikely. Build the
8488            // simplest collision the codebase admits: have the focal
8489            // episode's semantic neighbor's memory_id appear as a
8490            // triple's object_id (formatted as ent:<that-uuid>). The
8491            // explicit edge is then `ent:<self-subject> -> ent:<uuid>`;
8492            // the semantic edge is `ep:focal -> ep:<uuid>`. The (source,
8493            // target) pair DIFFERS (`ent:X` vs `ep:focal`), so dedupe
8494            // would NOT fire -- which is correct: those are structurally
8495            // different relationships.
8496            //
8497            // Therefore the realistic dedupe test is the trivial
8498            // tautology: explicit and semantic produce no collisions in
8499            // practice. Lock that in by asserting that the same memory_id
8500            // never appears with an edge from both paths.
8501            let _other = post_remember(h.router.clone(), "beta beta beta").await;
8502            {
8503                let conn = h.open_db();
8504                let rowid: i64 = conn
8505                    .query_row(
8506                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
8507                        rusqlite::params![&focal],
8508                        |r| r.get(0),
8509                    )
8510                    .unwrap();
8511                seed_triple_row(
8512                    &conn,
8513                    "t-dedupe-1",
8514                    "Alice",
8515                    "knows",
8516                    "Bob",
8517                    Some(rowid),
8518                );
8519            }
8520            let (status, body) = call(
8521                h.router.clone(),
8522                "GET",
8523                &neighbors_uri(&format!("ep:{focal}"), Some("both"), Some(0.0), None),
8524                None,
8525            )
8526            .await;
8527            assert_eq!(status, StatusCode::OK, "body: {body}");
8528            // For every edge, count occurrences of (source, target). No
8529            // pair should appear twice (which is what the dedupe rule
8530            // guarantees).
8531            let edges = body["edges"].as_array().unwrap();
8532            let mut seen: std::collections::HashMap<(String, String), i32> =
8533                std::collections::HashMap::new();
8534            for e in edges {
8535                let key = (
8536                    e["source"].as_str().unwrap().to_string(),
8537                    e["target"].as_str().unwrap().to_string(),
8538                );
8539                *seen.entry(key).or_insert(0) += 1;
8540            }
8541            for (pair, count) in &seen {
8542                assert_eq!(
8543                    *count, 1,
8544                    "edge pair {pair:?} appears {count} times -- dedupe rule violated: {body}"
8545                );
8546            }
8547        });
8548        h.shutdown(&runtime);
8549    }
8550
8551    /// 5. Threshold filter -- raising the threshold drops low-similarity
8552    /// semantic neighbors.
8553    #[test]
8554    fn neighbors_threshold_filters_low_similarity() {
8555        let runtime = rt();
8556        let h = Harness::new(&runtime);
8557        runtime.block_on(async {
8558            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
8559            let _o1 = post_remember(h.router.clone(), "beta one").await;
8560            let _o2 = post_remember(h.router.clone(), "beta two").await;
8561            let _o3 = post_remember(h.router.clone(), "beta three").await;
8562            // Low threshold -- expect more semantic hits.
8563            let (status, low_body) = call(
8564                h.router.clone(),
8565                "GET",
8566                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
8567                None,
8568            )
8569            .await;
8570            assert_eq!(status, StatusCode::OK, "body: {low_body}");
8571            let low_edge_count = low_body["edges"].as_array().unwrap().len();
8572            // High threshold -- expect fewer (or equal) semantic hits.
8573            let (status, high_body) = call(
8574                h.router.clone(),
8575                "GET",
8576                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.99), None),
8577                None,
8578            )
8579            .await;
8580            assert_eq!(status, StatusCode::OK, "body: {high_body}");
8581            let high_edge_count = high_body["edges"].as_array().unwrap().len();
8582            assert!(
8583                high_edge_count <= low_edge_count,
8584                "high-threshold ({high_edge_count}) must not exceed low-threshold ({low_edge_count}): low={low_body}, high={high_body}"
8585            );
8586            // Also assert every surviving high-threshold edge satisfies
8587            // the filter.
8588            for e in high_body["edges"].as_array().unwrap() {
8589                if let Some(w) = e["weight"].as_f64() {
8590                    assert!(
8591                        w >= 0.99,
8592                        "edge with weight {w} survived threshold=0.99: {e}"
8593                    );
8594                }
8595            }
8596        });
8597        h.shutdown(&runtime);
8598    }
8599
8600    /// 6. `?limit=999` is silently clamped at the family ceiling (100) --
8601    /// same policy as `/v1/graph/expand`.
8602    #[test]
8603    fn neighbors_limit_clamped_at_100() {
8604        let runtime = rt();
8605        let h = Harness::new(&runtime);
8606        // Seed a cluster with > 100 episodes so the explicit cluster_member
8607        // path could surface > 100 -- clamp must cap at 100.
8608        {
8609            let conn = h.open_db();
8610            seed_cluster_row(&conn, "cl-huge-n", 1000);
8611            for i in 0..150 {
8612                let mid = format!("99119911-1111-7000-8000-{:012}", i);
8613                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
8614                seed_cluster_member(&conn, "cl-huge-n", &mid);
8615            }
8616        }
8617        let (status, body) = runtime.block_on(call(
8618            h.router.clone(),
8619            "GET",
8620            &neighbors_uri("cl:cl-huge-n", Some("explicit"), None, Some(999)),
8621            None,
8622        ));
8623        assert_eq!(status, StatusCode::OK, "body: {body}");
8624        let edges = body["edges"].as_array().unwrap();
8625        assert_eq!(
8626            edges.len(),
8627            100,
8628            "limit must be silently clamped to 100, got {}",
8629            edges.len()
8630        );
8631        h.shutdown(&runtime);
8632    }
8633
8634    /// 7. `kind=semantic` on a document focal node returns 400.
8635    #[test]
8636    fn neighbors_semantic_rejects_document_source() {
8637        let runtime = rt();
8638        let h = Harness::new(&runtime);
8639        let doc_id = "d-semrej-0000-7000-8000-000000000001";
8640        {
8641            let conn = h.open_db();
8642            seed_document_row(&conn, doc_id, "host");
8643        }
8644        let (status, body) = runtime.block_on(call(
8645            h.router.clone(),
8646            "GET",
8647            &neighbors_uri(
8648                &format!("doc:{doc_id}"),
8649                Some("semantic"),
8650                None,
8651                None,
8652            ),
8653            None,
8654        ));
8655        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
8656        let err = body["error"].as_str().unwrap_or_default();
8657        assert!(
8658            err.contains("episode") && err.contains("chunk"),
8659            "error must list supported kinds: {body}"
8660        );
8661        h.shutdown(&runtime);
8662    }
8663
8664    /// 8. `kind=semantic` on a cluster focal node returns 400.
8665    #[test]
8666    fn neighbors_semantic_rejects_cluster_source() {
8667        let runtime = rt();
8668        let h = Harness::new(&runtime);
8669        let cluster_id = "cl-semrej-target";
8670        {
8671            let conn = h.open_db();
8672            seed_cluster_row(&conn, cluster_id, 12345);
8673        }
8674        let (status, body) = runtime.block_on(call(
8675            h.router.clone(),
8676            "GET",
8677            &neighbors_uri(
8678                &format!("cl:{cluster_id}"),
8679                Some("semantic"),
8680                None,
8681                None,
8682            ),
8683            None,
8684        ));
8685        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
8686        h.shutdown(&runtime);
8687    }
8688
8689    /// 9. Entity focal node returns only explicit triple edges; no
8690    /// semantic edges (entities have no embeddings, semantic path is
8691    /// silently skipped under `kind=both`).
8692    #[test]
8693    fn neighbors_entity_returns_triples_only() {
8694        let runtime = rt();
8695        let h = Harness::new(&runtime);
8696        runtime.block_on(async {
8697            // Use the writer-actor so the host episode lands in HNSW too
8698            // (any HNSW state is irrelevant since entities can't trigger
8699            // semantic recall; included to prove the semantic path is
8700            // silently skipped, not erroring).
8701            let host_mid = post_remember(h.router.clone(), "Alice and Bob talked").await;
8702            {
8703                let conn = h.open_db();
8704                let rowid: i64 = conn
8705                    .query_row(
8706                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
8707                        rusqlite::params![&host_mid],
8708                        |r| r.get(0),
8709                    )
8710                    .unwrap();
8711                seed_triple_row(&conn, "t-ent-n-1", "Alice", "knows", "Bob", Some(rowid));
8712                seed_triple_row(&conn, "t-ent-n-2", "Alice", "works_at", "Acme", Some(rowid));
8713            }
8714            let (status, body) = call(
8715                h.router.clone(),
8716                "GET",
8717                &neighbors_uri("ent:Alice", None, Some(0.0), None),
8718                None,
8719            )
8720            .await;
8721            assert_eq!(status, StatusCode::OK, "body: {body}");
8722            let edges = body["edges"].as_array().unwrap();
8723            assert!(!edges.is_empty(), "expected explicit triples: {body}");
8724            for e in edges {
8725                assert_eq!(
8726                    e["kind"], "triple",
8727                    "entity focal must produce only triple edges: {body}"
8728                );
8729            }
8730        });
8731        h.shutdown(&runtime);
8732    }
8733
8734    /// 10. Cross-tenant lookups are blocked at the TenantExtractor before
8735    /// the handler runs.
8736    #[test]
8737    fn neighbors_respects_tenant_scoping() {
8738        let runtime = rt();
8739        let h = Harness::new(&runtime);
8740        let memory_id = "a8880000-0000-7000-8000-000000000001";
8741        {
8742            let conn = h.open_db();
8743            seed_episode(&conn, memory_id, 100, "tenant scope");
8744        }
8745        // Wrong tenant header -> 404 from registry, before handler runs.
8746        let r = h.router.clone();
8747        let (status, _) = runtime.block_on(async {
8748            let req = Request::builder()
8749                .method("GET")
8750                .uri(neighbors_uri(
8751                    &format!("ep:{memory_id}"),
8752                    Some("explicit"),
8753                    None,
8754                    None,
8755                ))
8756                .header("x-solo-tenant", "never-registered-tenant-n")
8757                .body(Body::empty())
8758                .unwrap();
8759            let resp = r.oneshot(req).await.expect("oneshot");
8760            let s = resp.status();
8761            let _b = resp.into_body().collect().await.unwrap().to_bytes();
8762            (s, _b)
8763        });
8764        assert_eq!(status, StatusCode::NOT_FOUND);
8765        // Sanity: same id resolves on default tenant.
8766        let (status, body) = runtime.block_on(call(
8767            h.router.clone(),
8768            "GET",
8769            &neighbors_uri(&format!("ep:{memory_id}"), Some("explicit"), None, None),
8770            None,
8771        ));
8772        assert_eq!(status, StatusCode::OK, "default tenant must resolve: {body}");
8773        h.shutdown(&runtime);
8774    }
8775
8776    /// 11. Bearer-auth gate: missing token -> 401; valid token + unknown
8777    /// node -> 404 (auth passed, handler ran).
8778    #[test]
8779    fn neighbors_respects_auth_when_enabled() {
8780        let runtime = rt();
8781        let h = Harness::new_with_auth(&runtime, Some("neighbors-secret".into()));
8782        // Missing Authorization -> 401.
8783        let (status, _) = runtime.block_on(call(
8784            h.router.clone(),
8785            "GET",
8786            &neighbors_uri(
8787                "ep:99999999-9999-7000-8000-000000000999",
8788                Some("explicit"),
8789                None,
8790                None,
8791            ),
8792            None,
8793        ));
8794        assert_eq!(status, StatusCode::UNAUTHORIZED);
8795        // Valid bearer + unknown node -> 404 from the handler.
8796        let (status, _) = runtime.block_on(call_with_auth(
8797            h.router.clone(),
8798            "GET",
8799            &neighbors_uri(
8800                "ep:99999999-9999-7000-8000-000000000999",
8801                Some("explicit"),
8802                None,
8803                None,
8804            ),
8805            None,
8806            Some("Bearer neighbors-secret"),
8807        ));
8808        assert_eq!(status, StatusCode::NOT_FOUND);
8809        h.shutdown(&runtime);
8810    }
8811
8812    // ---------------------------------------------------------------------
8813    // v0.10.0: GET /v1/graph/stream — SSE invalidation feed
8814    //
8815    // Driving SSE through axum's in-process router (`oneshot`) requires
8816    // reading the response body as a stream of frames and parsing each
8817    // chunk against the SSE wire format (`event: NAME\ndata: JSON\n\n`).
8818    // The `read_one_sse_event` helper below does that incrementally so
8819    // tests don't have to wait for the stream to close (which would
8820    // never happen — the SSE loop runs until the client drops).
8821    // ---------------------------------------------------------------------
8822
8823    /// One parsed SSE event: the `event:` field plus the `data:` payload
8824    /// re-parsed as JSON. Empty / comment-only frames are filtered out
8825    /// by the parser; callers only see real events.
8826    #[derive(Debug, Clone)]
8827    struct ParsedSseEvent {
8828        event: String,
8829        data: Value,
8830    }
8831
8832    /// Read frames off the SSE body until ONE complete event lands, then
8833    /// return it. Times out after `timeout` to keep red-test feedback
8834    /// fast. On timeout returns `None`.
8835    async fn read_one_sse_event(
8836        body: &mut axum::body::Body,
8837        timeout: std::time::Duration,
8838    ) -> Option<ParsedSseEvent> {
8839        use http_body_util::BodyExt;
8840        let mut buf = String::new();
8841        let start = std::time::Instant::now();
8842        loop {
8843            if start.elapsed() >= timeout {
8844                return None;
8845            }
8846            let remaining = timeout.saturating_sub(start.elapsed());
8847            let frame_res =
8848                tokio::time::timeout(remaining, body.frame()).await;
8849            let frame = match frame_res {
8850                Ok(Some(Ok(f))) => f,
8851                Ok(Some(Err(_))) | Ok(None) => return None,
8852                Err(_) => return None,
8853            };
8854            if let Ok(data) = frame.into_data() {
8855                buf.push_str(&String::from_utf8_lossy(&data));
8856                // Parse complete events (double newline separator).
8857                while let Some(idx) = buf.find("\n\n") {
8858                    let block: String = buf.drain(..idx + 2).collect();
8859                    if let Some(parsed) = parse_sse_block(&block) {
8860                        return Some(parsed);
8861                    }
8862                }
8863            }
8864        }
8865    }
8866
8867    /// Parse one SSE block (raw text between two `\n\n` separators).
8868    /// Returns `None` for comment-only blocks (lines starting with `:`)
8869    /// or blocks missing either `event:` or `data:`.
8870    fn parse_sse_block(block: &str) -> Option<ParsedSseEvent> {
8871        let mut event: Option<String> = None;
8872        let mut data: Option<String> = None;
8873        for line in block.lines() {
8874            if let Some(rest) = line.strip_prefix("event:") {
8875                event = Some(rest.trim().to_string());
8876            } else if let Some(rest) = line.strip_prefix("data:") {
8877                data = Some(rest.trim().to_string());
8878            }
8879        }
8880        let event = event?;
8881        let data_str = data?;
8882        let data_json = serde_json::from_str(&data_str).ok()?;
8883        Some(ParsedSseEvent {
8884            event,
8885            data: data_json,
8886        })
8887    }
8888
8889    /// Open the SSE stream and return the response body for further
8890    /// frame-level reads. The headers are validated (Content-Type +
8891    /// status) before the body is returned.
8892    async fn open_sse_stream_inner(
8893        router: axum::Router,
8894        auth: Option<&str>,
8895        tenant: Option<&str>,
8896    ) -> (StatusCode, axum::body::Body) {
8897        let mut builder = Request::builder()
8898            .method("GET")
8899            .uri("/v1/graph/stream");
8900        if let Some(a) = auth {
8901            builder = builder.header("authorization", a);
8902        }
8903        if let Some(t) = tenant {
8904            builder = builder.header("x-solo-tenant", t);
8905        }
8906        let req = builder
8907            .header("content-length", "0")
8908            .body(Body::empty())
8909            .unwrap();
8910        let resp = router.oneshot(req).await.expect("oneshot");
8911        let status = resp.status();
8912        let body = resp.into_body();
8913        (status, body)
8914    }
8915
8916    /// 1. `init` event lands as the first chunk.
8917    #[test]
8918    fn stream_emits_init_event_on_connect() {
8919        let runtime = rt();
8920        let h = Harness::new(&runtime);
8921        let r = h.router.clone();
8922        runtime.block_on(async {
8923            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
8924            assert_eq!(status, StatusCode::OK);
8925            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
8926                .await
8927                .expect("must receive init event within 2s");
8928            assert_eq!(ev.event, "init");
8929            assert_eq!(ev.data["connected"].as_bool(), Some(true));
8930            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
8931            assert!(ev.data["ts_ms"].is_number());
8932        });
8933        h.shutdown(&runtime);
8934    }
8935
8936    /// 2. Firing an InvalidateEvent on the broadcast channel surfaces
8937    /// as an `invalidate` SSE event.
8938    #[test]
8939    fn stream_emits_invalidate_after_writer_event() {
8940        let runtime = rt();
8941        let h = Harness::new(&runtime);
8942        let r = h.router.clone();
8943        let sender = h.invalidate_sender();
8944        runtime.block_on(async {
8945            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
8946            assert_eq!(status, StatusCode::OK);
8947            // Discard the init event.
8948            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
8949                .await
8950                .unwrap();
8951            assert_eq!(init.event, "init");
8952            // Fire a writer-actor-style event on the broadcast.
8953            sender
8954                .send(InvalidateEvent {
8955                    reason: "memory.remember".to_string(),
8956                    tenant_id: "default".to_string(),
8957                    ts_ms: 1_715_625_600_000,
8958                    kind: "episode".to_string(),
8959                })
8960                .expect("must have at least one subscriber");
8961            // The SSE handler must surface it.
8962            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
8963                .await
8964                .expect("invalidate event must arrive within 2s");
8965            assert_eq!(ev.event, "invalidate");
8966            assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
8967            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
8968            assert_eq!(ev.data["kind"].as_str(), Some("episode"));
8969        });
8970        h.shutdown(&runtime);
8971    }
8972
8973    /// 3. Each kind of writer-actor event surfaces with its mapped
8974    /// `(reason, kind)` shape.
8975    #[test]
8976    fn stream_emits_invalidate_for_each_writer_command() {
8977        let runtime = rt();
8978        let h = Harness::new(&runtime);
8979        let r = h.router.clone();
8980        let sender = h.invalidate_sender();
8981        let cases = [
8982            ("memory.remember", "episode"),
8983            ("memory.forget", "episode"),
8984            ("memory.consolidate", "cluster"),
8985            ("memory.ingest_document", "document"),
8986            ("memory.forget_document", "document"),
8987            ("memory.triples_extract", "cluster"),
8988            ("memory.reembed", "episode"),
8989            ("gdpr.forget_user", "tenant"),
8990        ];
8991        runtime.block_on(async {
8992            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
8993            assert_eq!(status, StatusCode::OK);
8994            // Discard the init.
8995            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
8996                .await
8997                .unwrap();
8998            for (reason, kind) in cases {
8999                sender
9000                    .send(InvalidateEvent {
9001                        reason: reason.to_string(),
9002                        tenant_id: "default".to_string(),
9003                        ts_ms: 1_715_625_600_000,
9004                        kind: kind.to_string(),
9005                    })
9006                    .unwrap();
9007                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9008                    .await
9009                    .unwrap_or_else(|| panic!("must receive event for {reason}"));
9010                assert_eq!(ev.event, "invalidate");
9011                assert_eq!(
9012                    ev.data["reason"].as_str(),
9013                    Some(reason),
9014                    "reason mismatch"
9015                );
9016                assert_eq!(ev.data["kind"].as_str(), Some(kind), "kind mismatch");
9017            }
9018        });
9019        h.shutdown(&runtime);
9020    }
9021
9022    /// 4. Heartbeat events fire on the configured interval when no real
9023    /// events arrive. Drives `build_invalidate_stream` at a 1-second
9024    /// heartbeat (the public handler uses 30s in prod), wraps it in an
9025    /// `Sse` response, then reads + parses the SSE body via the same
9026    /// `read_one_sse_event` helper the HTTP-layer tests use. This
9027    /// exercises the public Event → body byte path without touching
9028    /// `Event::finalize` (which is private).
9029    #[test]
9030    fn stream_emits_heartbeat_when_no_events() {
9031        let runtime = rt();
9032        let h = Harness::new(&runtime);
9033        let sender = h.invalidate_sender();
9034        runtime.block_on(async {
9035            // Subscribe FIRST so a later writer-side `send` would lag
9036            // the receiver if the subscriber stalled.
9037            let rx = sender.subscribe();
9038            // Build the SSE stream with a 1-second heartbeat interval —
9039            // bypassing the 30s production default.
9040            let stream = build_invalidate_stream(rx, "default".to_string(), 1);
9041            // Wrap in an Sse response + extract the body bytes through
9042            // axum's IntoResponse path. This produces real on-the-wire
9043            // SSE bytes that `read_one_sse_event` can parse.
9044            let sse: Sse<_> = Sse::new(stream);
9045            let resp = sse.into_response();
9046            let mut body = resp.into_body();
9047            // First event must be `init`.
9048            let first =
9049                read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9050                    .await
9051                    .expect("init event must arrive");
9052            assert_eq!(first.event, "init");
9053            // Second must be heartbeat (no invalidates fired, ~1s
9054            // interval; allow 3s window for runtime jitter).
9055            let second =
9056                read_one_sse_event(&mut body, std::time::Duration::from_secs(3))
9057                    .await
9058                    .expect("heartbeat event must arrive within 3s");
9059            assert_eq!(second.event, "heartbeat");
9060            assert!(second.data["ts_ms"].is_number());
9061        });
9062        h.shutdown(&runtime);
9063    }
9064
9065    /// 5. Two subscribers connected to the same tenant both receive
9066    /// every invalidate.
9067    #[test]
9068    fn stream_concurrent_subscribers_same_tenant() {
9069        let runtime = rt();
9070        let h = Harness::new(&runtime);
9071        let r1 = h.router.clone();
9072        let r2 = h.router.clone();
9073        let r3 = h.router.clone();
9074        let sender = h.invalidate_sender();
9075        runtime.block_on(async {
9076            // Open three subscribers.
9077            let (s1, mut body1) = open_sse_stream_inner(r1, None, None).await;
9078            let (s2, mut body2) = open_sse_stream_inner(r2, None, None).await;
9079            let (s3, mut body3) = open_sse_stream_inner(r3, None, None).await;
9080            assert_eq!(s1, StatusCode::OK);
9081            assert_eq!(s2, StatusCode::OK);
9082            assert_eq!(s3, StatusCode::OK);
9083            // Drain init events from each.
9084            for body in [&mut body1, &mut body2, &mut body3] {
9085                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
9086                    .await
9087                    .unwrap();
9088                assert_eq!(ev.event, "init");
9089            }
9090            // Receiver count should be at least 3 now.
9091            assert!(
9092                sender.receiver_count() >= 3,
9093                "expected ≥3 subscribers, got {}",
9094                sender.receiver_count()
9095            );
9096            // Fire one invalidate.
9097            sender
9098                .send(InvalidateEvent {
9099                    reason: "memory.remember".to_string(),
9100                    tenant_id: "default".to_string(),
9101                    ts_ms: 1_715_625_600_000,
9102                    kind: "episode".to_string(),
9103                })
9104                .expect("send must succeed");
9105            // All three receive it.
9106            for body in [&mut body1, &mut body2, &mut body3] {
9107                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
9108                    .await
9109                    .unwrap();
9110                assert_eq!(ev.event, "invalidate");
9111                assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
9112            }
9113        });
9114        h.shutdown(&runtime);
9115    }
9116
9117    /// 6. Dropping the SSE client decrements the per-tenant subscriber
9118    /// count — graceful cleanup invariant.
9119    #[test]
9120    fn stream_handles_client_disconnect_gracefully() {
9121        let runtime = rt();
9122        let h = Harness::new(&runtime);
9123        let r = h.router.clone();
9124        let sender = h.invalidate_sender();
9125        let before = sender.receiver_count();
9126        runtime.block_on(async {
9127            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
9128            assert_eq!(status, StatusCode::OK);
9129            // Drain the init so the stream is fully active.
9130            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9131                .await
9132                .unwrap();
9133            let during = sender.receiver_count();
9134            assert!(
9135                during > before,
9136                "subscriber count must increase while stream is live (before={before}, during={during})"
9137            );
9138            // Drop the body — simulates the client closing the
9139            // connection. axum drops the stream future, which drops the
9140            // Receiver.
9141            drop(body);
9142        });
9143        // Allow tokio a beat to drop the Receiver task.
9144        runtime.block_on(async {
9145            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
9146        });
9147        let after = sender.receiver_count();
9148        assert!(
9149            after <= before,
9150            "subscriber count must drop back after disconnect (before={before}, after={after})"
9151        );
9152        h.shutdown(&runtime);
9153    }
9154
9155    /// 7. Bearer-auth gate: missing token -> 401.
9156    #[test]
9157    fn stream_respects_auth_when_enabled() {
9158        let runtime = rt();
9159        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
9160        let r = h.router.clone();
9161        runtime.block_on(async {
9162            let (status, _body) = open_sse_stream_inner(r, None, None).await;
9163            assert_eq!(status, StatusCode::UNAUTHORIZED);
9164        });
9165        h.shutdown(&runtime);
9166    }
9167
9168    /// 8. Anonymous OK when auth=None (loopback default).
9169    #[test]
9170    fn stream_works_with_auth_none() {
9171        let runtime = rt();
9172        let h = Harness::new(&runtime);
9173        let r = h.router.clone();
9174        runtime.block_on(async {
9175            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
9176            assert_eq!(status, StatusCode::OK);
9177            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9178                .await
9179                .expect("must receive init event");
9180            assert_eq!(ev.event, "init");
9181        });
9182        h.shutdown(&runtime);
9183    }
9184
9185    /// 9. Bearer-auth gate: valid token allows the stream to open.
9186    #[test]
9187    fn stream_respects_auth_accepts_valid_token() {
9188        let runtime = rt();
9189        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
9190        let r = h.router.clone();
9191        runtime.block_on(async {
9192            let (status, mut body) =
9193                open_sse_stream_inner(r, Some("Bearer stream-secret"), None).await;
9194            assert_eq!(status, StatusCode::OK);
9195            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
9196                .await
9197                .expect("must receive init event with valid bearer");
9198            assert_eq!(ev.event, "init");
9199            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
9200        });
9201        h.shutdown(&runtime);
9202    }
9203
9204    /// 10. Cross-tenant lookups are 404 at TenantExtractor before the
9205    /// stream opens — wrong tenant header never reaches the handler.
9206    #[test]
9207    fn stream_respects_tenant_scoping() {
9208        let runtime = rt();
9209        let h = Harness::new(&runtime);
9210        let r = h.router.clone();
9211        runtime.block_on(async {
9212            let (status, _body) =
9213                open_sse_stream_inner(r, None, Some("never-registered-tenant-x")).await;
9214            // The single-tenant test registry returns NotFound from
9215            // get_or_open when the header points to a tenant that isn't
9216            // cached; the TenantExtractor maps that to 404.
9217            assert_eq!(status, StatusCode::NOT_FOUND);
9218        });
9219        h.shutdown(&runtime);
9220    }
9221
9222    // -----------------------------------------------------------------
9223    // /v1/tenants — principal-scoped tenant list (v0.10.0)
9224    //
9225    // Seeds the harness's in-memory tenants_index stub via
9226    // `harness.registry.with_index(|idx| idx.register(...))` to drive
9227    // the read-only list endpoint. The default tenant from the
9228    // harness's HashMap is NOT in the index stub by construction (the
9229    // `for_tests_with_single_tenant` factory only wires the cached
9230    // HashMap entry; the index starts empty after migrations), so each
9231    // test that wants the default tenant listed registers it
9232    // explicitly. This keeps the test setup explicit about what's
9233    // visible to `list_active` versus what's open in memory.
9234    // -----------------------------------------------------------------
9235
9236    /// Seed three Active tenants into the registry's index. Returns the
9237    /// ids in the order they were registered, which is the order
9238    /// `list_active` will return them in (ORDER BY created_at_ms ASC).
9239    async fn seed_three_tenants(registry: &TenantRegistry) -> Vec<String> {
9240        use solo_core::TenantId as TenantIdT;
9241        let ids = ["alice", "bob", "default"];
9242        for id in ids {
9243            let tid = TenantIdT::new(id).unwrap();
9244            registry
9245                .with_index(|idx| {
9246                    idx.register(&tid, &format!("{id}.db"), Some(&format!("{id} tenant")))
9247                        .unwrap();
9248                    // Ensure created_at_ms diverges so the ASC sort is
9249                    // deterministic — the index uses `chrono::Utc::now()`
9250                    // per row and 3 sequential inserts can land in the
9251                    // same ms on fast hardware.
9252                })
9253                .await;
9254            tokio::time::sleep(std::time::Duration::from_millis(2)).await;
9255        }
9256        // Sort matches the `created_at_ms ASC, tenant_id ASC` order
9257        // `TenantsIndex::list` returns. We inserted in (alice, bob,
9258        // default) order with 2ms gaps, so that's the expected order.
9259        vec!["alice".into(), "bob".into(), "default".into()]
9260    }
9261
9262    /// 1. With `AuthConfig::None`, the handler returns every tenant
9263    ///    visible in the registry — same scope as `solo tenants list`.
9264    ///    Exercises the "no principal" branch of the visibility filter.
9265    #[test]
9266    fn tenants_returns_all_when_auth_none() {
9267        let runtime = rt();
9268        let h = Harness::new(&runtime);
9269        let r = h.router.clone();
9270        runtime.block_on(async {
9271            let _expected = seed_three_tenants(&h.registry).await;
9272            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9273            assert_eq!(status, StatusCode::OK);
9274            let arr = body
9275                .get("tenants")
9276                .and_then(|v| v.as_array())
9277                .expect("tenants array");
9278            assert_eq!(arr.len(), 3, "got body: {body}");
9279            let ids: Vec<&str> =
9280                arr.iter().filter_map(|t| t["id"].as_str()).collect();
9281            assert_eq!(ids, vec!["alice", "bob", "default"]);
9282        });
9283        h.shutdown(&runtime);
9284    }
9285
9286    /// 2. Under Bearer auth (single-principal mode), the handler
9287    ///    returns every tenant — the bearer holder is treated as the
9288    ///    daemon operator with full visibility. Exercises the bearer
9289    ///    branch of the visibility filter.
9290    #[test]
9291    fn tenants_returns_all_when_bearer_auth() {
9292        let runtime = rt();
9293        let h = Harness::new_with_auth(&runtime, Some("tlist-secret".into()));
9294        let r = h.router.clone();
9295        runtime.block_on(async {
9296            seed_three_tenants(&h.registry).await;
9297            let (status, body) = call_with_auth(
9298                r,
9299                "GET",
9300                "/v1/tenants",
9301                None,
9302                Some("Bearer tlist-secret"),
9303            )
9304            .await;
9305            assert_eq!(status, StatusCode::OK, "got body: {body}");
9306            let arr = body["tenants"].as_array().expect("tenants array");
9307            assert_eq!(arr.len(), 3, "bearer must see all tenants");
9308        });
9309        h.shutdown(&runtime);
9310    }
9311
9312    /// 3. Under OIDC, an authenticated principal carrying
9313    ///    `tenant_claim = "alice"` sees ONLY alice — not bob, not
9314    ///    default. Exercises the OIDC branch of the visibility filter.
9315    #[test]
9316    fn tenants_filters_to_principal_claim_when_oidc() {
9317        let runtime = rt();
9318        let (fake_server, discovery_url, secret, kid) =
9319            runtime.block_on(async { spin_fake_idp().await });
9320        let server_uri = fake_server.uri();
9321        let _server_guard = fake_server;
9322
9323        let auth = crate::auth::AuthConfig::Oidc {
9324            discovery_url,
9325            audience: "tlist-audience".to_string(),
9326            tenant_claim_name: "solo_tenant".to_string(),
9327        };
9328        let h = Harness::new_with_auth_config(&runtime, Some(auth));
9329        let r = h.router.clone();
9330
9331        runtime.block_on(async {
9332            seed_three_tenants(&h.registry).await;
9333            let token = mint_idp_token(
9334                &server_uri,
9335                kid,
9336                &secret,
9337                "alice",
9338                "tlist-audience",
9339            );
9340            let (status, body) = call_with_auth(
9341                r,
9342                "GET",
9343                "/v1/tenants",
9344                None,
9345                Some(&format!("Bearer {token}")),
9346            )
9347            .await;
9348            assert_eq!(status, StatusCode::OK, "got body: {body}");
9349            let arr = body["tenants"].as_array().expect("tenants array");
9350            assert_eq!(arr.len(), 1, "OIDC alice must see exactly one tenant");
9351            assert_eq!(arr[0]["id"].as_str(), Some("alice"));
9352        });
9353        h.shutdown(&runtime);
9354    }
9355
9356    /// 4. Under OIDC with a `tenant_claim` that doesn't match any
9357    ///    registered tenant, the response is `200 OK` with
9358    ///    `tenants: []` — NOT 404. Don't leak whether other tenants
9359    ///    exist via a status-code side-channel for an OIDC principal
9360    ///    that lacks visibility to them.
9361    #[test]
9362    fn tenants_returns_empty_when_oidc_claim_unmatched() {
9363        let runtime = rt();
9364        let (fake_server, discovery_url, secret, kid) =
9365            runtime.block_on(async { spin_fake_idp().await });
9366        let server_uri = fake_server.uri();
9367        let _server_guard = fake_server;
9368
9369        let auth = crate::auth::AuthConfig::Oidc {
9370            discovery_url,
9371            audience: "tlist-audience".to_string(),
9372            tenant_claim_name: "solo_tenant".to_string(),
9373        };
9374        let h = Harness::new_with_auth_config(&runtime, Some(auth));
9375        let r = h.router.clone();
9376
9377        runtime.block_on(async {
9378            seed_three_tenants(&h.registry).await;
9379            // Mint a token claiming a tenant that IS a valid TenantId
9380            // (passes middleware) but doesn't exist in the index.
9381            let token = mint_idp_token(
9382                &server_uri,
9383                kid,
9384                &secret,
9385                "nonexistent",
9386                "tlist-audience",
9387            );
9388            let (status, body) = call_with_auth(
9389                r,
9390                "GET",
9391                "/v1/tenants",
9392                None,
9393                Some(&format!("Bearer {token}")),
9394            )
9395            .await;
9396            assert_eq!(
9397                status,
9398                StatusCode::OK,
9399                "must be 200 OK, not 404 — don't leak tenant existence: {body}"
9400            );
9401            let arr = body["tenants"].as_array().expect("tenants array");
9402            assert_eq!(
9403                arr.len(),
9404                0,
9405                "unmatched OIDC claim must produce empty list, got: {body}"
9406            );
9407        });
9408        h.shutdown(&runtime);
9409    }
9410
9411    /// 5. JSON response shape matches what solo-web's TypeScript
9412    ///    client expects: `tenants[*].{id,display_name,created_at_ms,
9413    ///    status,quota_bytes,episode_count,size_bytes,pct_used,
9414    ///    last_accessed_ms}`. Catches accidental field renames at PR
9415    ///    time.
9416    ///
9417    ///    v0.10.1: `episode_count` / `size_bytes` / `pct_used` are
9418    ///    hydrated when the per-tenant DB file exists. This test
9419    ///    registers a tenant whose DB file does NOT exist (the
9420    ///    `for_tests_with_single_tenant` harness only writes the
9421    ///    `default` tenant's DB), so the three numeric fields land as
9422    ///    JSON `null` — verifying the `null` JSON value (not absence)
9423    ///    so clients see a stable shape regardless of hydration
9424    ///    success.
9425    #[test]
9426    fn tenants_response_shape_matches_solo_web_types() {
9427        let runtime = rt();
9428        let h = Harness::new(&runtime);
9429        let r = h.router.clone();
9430        runtime.block_on(async {
9431            // Register one tenant with a display_name + quota so all
9432            // optional fields are present in the response.
9433            let tid = solo_core::TenantId::new("shaped").unwrap();
9434            h.registry
9435                .with_index(|idx| {
9436                    idx.register_with_quota(
9437                        &tid,
9438                        "shaped.db",
9439                        Some("Shaped tenant"),
9440                        Some(1_048_576),
9441                    )
9442                    .unwrap();
9443                })
9444                .await;
9445            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9446            assert_eq!(status, StatusCode::OK);
9447            let item = &body["tenants"][0];
9448            // id, display_name, created_at_ms, status: required
9449            assert_eq!(item["id"].as_str(), Some("shaped"));
9450            assert_eq!(item["display_name"].as_str(), Some("Shaped tenant"));
9451            assert!(
9452                item["created_at_ms"].is_i64(),
9453                "created_at_ms must be an i64, got {item}"
9454            );
9455            assert_eq!(item["status"].as_str(), Some("active"));
9456            // quota_bytes: present + numeric
9457            assert_eq!(item["quota_bytes"].as_u64(), Some(1_048_576));
9458            // v0.10.1: episode_count / size_bytes / pct_used become
9459            // null when the per-tenant DB file is missing on disk
9460            // (this harness only writes the default tenant's file —
9461            // shaped.db does not exist). Clients must tolerate the
9462            // null JSON shape; absence would be a breaking change.
9463            assert!(
9464                item["episode_count"].is_null(),
9465                "episode_count must be JSON null when tenant DB is missing, got {item}"
9466            );
9467            assert!(
9468                item["size_bytes"].is_null(),
9469                "size_bytes must be JSON null when tenant DB is missing, got {item}"
9470            );
9471            assert!(
9472                item["pct_used"].is_null(),
9473                "pct_used must be JSON null when size_bytes is null, got {item}"
9474            );
9475        });
9476        h.shutdown(&runtime);
9477    }
9478
9479    /// 6. Bearer auth enabled + missing Authorization header → 401
9480    ///    before the handler runs. Confirms the route is plumbed
9481    ///    through `auth_middleware` (it sits inside the `authed`
9482    ///    sub-router, not the `public` one).
9483    #[test]
9484    fn tenants_respects_auth_when_enabled() {
9485        let runtime = rt();
9486        let h = Harness::new_with_auth(&runtime, Some("must-auth".into()));
9487        let r = h.router.clone();
9488        runtime.block_on(async {
9489            seed_three_tenants(&h.registry).await;
9490            // No Authorization header → 401.
9491            let (status, _body) = call(r, "GET", "/v1/tenants", None).await;
9492            assert_eq!(status, StatusCode::UNAUTHORIZED);
9493        });
9494        h.shutdown(&runtime);
9495    }
9496
9497    /// 7. `PendingMigration` and `PendingDelete` rows are excluded
9498    ///    from the response. solo-web's tenant picker should never
9499    ///    surface a row that's mid-admin-operation (race with admin
9500    ///    tooling). Only Active tenants make the list.
9501    #[test]
9502    fn tenants_status_filter_excludes_non_active() {
9503        let runtime = rt();
9504        let h = Harness::new(&runtime);
9505        let r = h.router.clone();
9506        runtime.block_on(async {
9507            // Three tenants, three statuses. Only `keeper` (Active)
9508            // should appear on the wire.
9509            let keeper = solo_core::TenantId::new("keeper").unwrap();
9510            let migrating = solo_core::TenantId::new("migrating").unwrap();
9511            let deleting = solo_core::TenantId::new("deleting").unwrap();
9512            h.registry
9513                .with_index(|idx| {
9514                    idx.register(&keeper, "keeper.db", None).unwrap();
9515                    idx.register_with_status(
9516                        &migrating,
9517                        "migrating.db",
9518                        None,
9519                        solo_storage::TenantStatus::PendingMigration,
9520                    )
9521                    .unwrap();
9522                    idx.register_with_status(
9523                        &deleting,
9524                        "deleting.db",
9525                        None,
9526                        solo_storage::TenantStatus::PendingDelete,
9527                    )
9528                    .unwrap();
9529                })
9530                .await;
9531            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9532            assert_eq!(status, StatusCode::OK);
9533            let arr = body["tenants"].as_array().expect("tenants array");
9534            let ids: Vec<&str> =
9535                arr.iter().filter_map(|t| t["id"].as_str()).collect();
9536            assert_eq!(
9537                ids,
9538                vec!["keeper"],
9539                "only Active tenants visible; got: {body}"
9540            );
9541        });
9542        h.shutdown(&runtime);
9543    }
9544
9545    /// 8. Empty registry → `200 OK` with `tenants: []`. Defends
9546    ///    against accidental `None` serialisation or 404'ing on an
9547    ///    empty list. solo-web's first paint on a brand-new daemon
9548    ///    needs an empty array to render the "no tenants yet" state.
9549    #[test]
9550    fn tenants_returns_empty_array_when_no_tenants_registered() {
9551        let runtime = rt();
9552        let h = Harness::new(&runtime);
9553        let r = h.router.clone();
9554        runtime.block_on(async {
9555            // Don't seed anything — the harness's in-memory index
9556            // starts at zero rows (the cached default-tenant handle in
9557            // the HashMap is invisible to `list_active`).
9558            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9559            assert_eq!(status, StatusCode::OK);
9560            let arr = body["tenants"].as_array().expect("tenants array");
9561            assert_eq!(arr.len(), 0, "expected empty array, got: {body}");
9562        });
9563        h.shutdown(&runtime);
9564    }
9565
9566    // ---- v0.10.1: cost-number hydration tests ----
9567    //
9568    // These exercise `TenantRegistry::hydrate_tenant_cost_numbers` end-
9569    // to-end through the `/v1/tenants` handler. The harness's
9570    // `for_tests_with_single_tenant` registry uses a plain-SQLite tenant
9571    // DB (not real SQLCipher); the hydration helper has a fallback
9572    // open path for that case (see registry.rs). The
9573    // `_tmp_dir/tenants/<filename>` layout matters: that's where the
9574    // hydration helper looks. These tests create real files there to
9575    // exercise the size_bytes path; episode_count requires the file to
9576    // be a SQLite DB with the `episodes` table.
9577    //
9578    // The `default` tenant exists at `_tmp_dir/test.db` (set by the
9579    // harness); the hydration helper expects `_tmp_dir/tenants/<file>`.
9580    // So we either (a) register a fresh tenant id pointing at a DB we
9581    // create at the expected layout, or (b) check the documented
9582    // behavior under "file missing" (returns null counts gracefully).
9583    // Both shapes are tested here.
9584    //
9585    // The constant `TENANTS_COUNT_HYDRATION_CAP` is grep-able.
9586
9587    /// Helper: create a per-tenant DB file at the layout the hydration
9588    /// helper expects (`<data_dir>/tenants/<db_filename>`), populated
9589    /// with the `episodes` table + `n_active` active episodes +
9590    /// `n_forgotten` forgotten episodes. Returns the absolute path.
9591    fn seed_per_tenant_db_with_episodes(
9592        data_dir: &std::path::Path,
9593        db_filename: &str,
9594        n_active: i64,
9595        n_forgotten: i64,
9596    ) -> std::path::PathBuf {
9597        let tenants_dir = data_dir.join(solo_storage::TENANTS_SUBDIR);
9598        std::fs::create_dir_all(&tenants_dir).unwrap();
9599        let db_path = tenants_dir.join(db_filename);
9600        // Open as plain SQLite (test path; matches the harness's
9601        // `open_test_db_at` shape; hydration helper falls back to plain
9602        // open when SQLCipher open fails).
9603        let mut conn = rusqlite::Connection::open(&db_path).unwrap();
9604        // Run the same migrations the real per-tenant DB does so the
9605        // `episodes` table + `status` CHECK constraint match production.
9606        solo_storage::run_migrations(&mut conn).unwrap();
9607        for i in 0..n_active {
9608            conn.execute(
9609                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
9610                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'active', 0, 0)",
9611                rusqlite::params![format!("a-{i}")],
9612            )
9613            .unwrap();
9614        }
9615        for i in 0..n_forgotten {
9616            conn.execute(
9617                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
9618                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'forgotten', 0, 0)",
9619                rusqlite::params![format!("f-{i}")],
9620            )
9621            .unwrap();
9622        }
9623        drop(conn);
9624        db_path
9625    }
9626
9627    /// v0.10.1 test 1: `episode_count` hydrates to the actual active
9628    /// episode count when the per-tenant DB exists. Seed 3 active + 2
9629    /// forgotten episodes; expect `episode_count: 3` (the `status =
9630    /// 'active'` filter excludes the forgotten rows).
9631    #[test]
9632    fn tenants_response_hydrates_episode_count_when_tenant_has_data() {
9633        let runtime = rt();
9634        let h = Harness::new(&runtime);
9635        let r = h.router.clone();
9636        let data_dir = h._tmp.path().to_path_buf();
9637        runtime.block_on(async {
9638            let tid = solo_core::TenantId::new("counted").unwrap();
9639            seed_per_tenant_db_with_episodes(&data_dir, "counted.db", 3, 2);
9640            h.registry
9641                .with_index(|idx| {
9642                    idx.register(&tid, "counted.db", Some("Counted tenant"))
9643                        .unwrap();
9644                })
9645                .await;
9646            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9647            assert_eq!(status, StatusCode::OK);
9648            let item = &body["tenants"][0];
9649            assert_eq!(item["id"].as_str(), Some("counted"));
9650            assert_eq!(
9651                item["episode_count"].as_i64(),
9652                Some(3),
9653                "episode_count must be 3 (active rows only, 2 forgotten excluded); got {item}"
9654            );
9655        });
9656        h.shutdown(&runtime);
9657    }
9658
9659    /// v0.10.1 test 2: `size_bytes` reports the on-disk size of the
9660    /// per-tenant DB file. Asserts the response value matches
9661    /// `std::fs::metadata(<db_path>).len()` exactly — pins that we
9662    /// read the right file, not e.g. data_dir or a temp.
9663    #[test]
9664    fn tenants_response_hydrates_size_bytes_from_db_file() {
9665        let runtime = rt();
9666        let h = Harness::new(&runtime);
9667        let r = h.router.clone();
9668        let data_dir = h._tmp.path().to_path_buf();
9669        runtime.block_on(async {
9670            let tid = solo_core::TenantId::new("sized").unwrap();
9671            let db_path =
9672                seed_per_tenant_db_with_episodes(&data_dir, "sized.db", 1, 0);
9673            h.registry
9674                .with_index(|idx| {
9675                    idx.register(&tid, "sized.db", None).unwrap();
9676                })
9677                .await;
9678            let on_disk = std::fs::metadata(&db_path).unwrap().len();
9679            assert!(on_disk > 0, "test setup: db file should be non-empty");
9680            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9681            assert_eq!(status, StatusCode::OK);
9682            let item = &body["tenants"][0];
9683            assert_eq!(item["id"].as_str(), Some("sized"));
9684            assert_eq!(
9685                item["size_bytes"].as_u64(),
9686                Some(on_disk),
9687                "size_bytes must match fs::metadata; got {item}"
9688            );
9689        });
9690        h.shutdown(&runtime);
9691    }
9692
9693    /// v0.10.1 test 3: `pct_used` is computed from `size_bytes /
9694    /// quota_bytes * 100` when both are known. Pick a quota much
9695    /// larger than the DB so the percentage stays in a sane range
9696    /// (and survives any unrelated DB-page padding).
9697    #[test]
9698    fn tenants_response_computes_pct_used_when_quota_set() {
9699        let runtime = rt();
9700        let h = Harness::new(&runtime);
9701        let r = h.router.clone();
9702        let data_dir = h._tmp.path().to_path_buf();
9703        runtime.block_on(async {
9704            let tid = solo_core::TenantId::new("quoted").unwrap();
9705            let db_path =
9706                seed_per_tenant_db_with_episodes(&data_dir, "quoted.db", 1, 0);
9707            // Pick a quota that's large enough that pct_used lands
9708            // between 0 and 50% regardless of SQLite page boundary
9709            // rounding. Asserting an exact float would be flaky.
9710            let on_disk = std::fs::metadata(&db_path).unwrap().len();
9711            let quota = on_disk * 4; // pct_used should be ~25%
9712            h.registry
9713                .with_index(|idx| {
9714                    idx.register_with_quota(&tid, "quoted.db", None, Some(quota))
9715                        .unwrap();
9716                })
9717                .await;
9718            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9719            assert_eq!(status, StatusCode::OK);
9720            let item = &body["tenants"][0];
9721            let pct = item["pct_used"].as_f64().expect("pct_used must be a number");
9722            assert!(
9723                (0.0..=100.0).contains(&pct),
9724                "pct_used must be in [0, 100], got {pct}"
9725            );
9726            // Allow a wide band — exact value depends on SQLite page
9727            // size — but the recipe (size/quota*100) means a
9728            // size=quota/4 setup must land near 25%.
9729            assert!(
9730                (20.0..=30.0).contains(&pct),
9731                "pct_used must be ~25% for size=quota/4, got {pct}"
9732            );
9733        });
9734        h.shutdown(&runtime);
9735    }
9736
9737    /// v0.10.1 test 4: `pct_used` is `null` when `quota_bytes` is
9738    /// null (the "unlimited" case). Pins that we don't accidentally
9739    /// emit a numeric `0.0` or `100.0` for unlimited quotas.
9740    #[test]
9741    fn tenants_response_pct_used_null_when_quota_null() {
9742        let runtime = rt();
9743        let h = Harness::new(&runtime);
9744        let r = h.router.clone();
9745        let data_dir = h._tmp.path().to_path_buf();
9746        runtime.block_on(async {
9747            let tid = solo_core::TenantId::new("unlimited").unwrap();
9748            seed_per_tenant_db_with_episodes(&data_dir, "unlimited.db", 1, 0);
9749            h.registry
9750                .with_index(|idx| {
9751                    idx.register(&tid, "unlimited.db", None).unwrap();
9752                })
9753                .await;
9754            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
9755            assert_eq!(status, StatusCode::OK);
9756            let item = &body["tenants"][0];
9757            assert_eq!(item["id"].as_str(), Some("unlimited"));
9758            assert!(
9759                item["quota_bytes"].is_null(),
9760                "test setup: quota_bytes must be null, got {item}"
9761            );
9762            assert!(
9763                item["pct_used"].is_null(),
9764                "pct_used must be JSON null when quota_bytes is null, got {item}"
9765            );
9766            // size_bytes still present (no quota doesn't suppress
9767            // size — only pct_used).
9768            assert!(
9769                item["size_bytes"].is_u64(),
9770                "size_bytes must still be present when quota_bytes is null, got {item}"
9771            );
9772        });
9773        h.shutdown(&runtime);
9774    }
9775
9776    /// v0.10.1 test 5: the response includes
9777    /// `X-Solo-Tenants-Count-Cap-Reached: true` when the filtered
9778    /// tenant count exceeds `TENANTS_COUNT_HYDRATION_CAP`. Tenants
9779    /// beyond the cap have `episode_count: null` even though their
9780    /// `size_bytes` is still hydrated (fs::metadata is cheap).
9781    ///
9782    /// We don't seed 51 real DBs (would be slow); instead, we
9783    /// register 51 tenant rows in the index. The cap is documented
9784    /// to apply to `episode_count` hydration, and the header is
9785    /// emitted purely from the count of filtered records. The
9786    /// header semantics here are independent of per-tenant DB
9787    /// existence.
9788    #[test]
9789    fn tenants_response_sets_cap_reached_header_when_over_cap() {
9790        let runtime = rt();
9791        let h = Harness::new(&runtime);
9792        let r = h.router.clone();
9793        runtime.block_on(async {
9794            // Register 51 tenants (cap = 50, so we exceed it).
9795            h.registry
9796                .with_index(|idx| {
9797                    for i in 0..51 {
9798                        let id = format!("t{i:02}");
9799                        let tid = solo_core::TenantId::new(&id).unwrap();
9800                        idx.register(&tid, &format!("{id}.db"), None).unwrap();
9801                    }
9802                })
9803                .await;
9804            // Send a raw request so we can inspect headers.
9805            use axum::body::Body;
9806            use axum::http::Request;
9807            use http_body_util::BodyExt;
9808            let req = Request::builder()
9809                .method("GET")
9810                .uri("/v1/tenants")
9811                .body(Body::empty())
9812                .unwrap();
9813            let resp = r.oneshot(req).await.unwrap();
9814            assert_eq!(resp.status(), StatusCode::OK);
9815            let cap_header = resp
9816                .headers()
9817                .get(X_SOLO_TENANTS_COUNT_CAP_HEADER)
9818                .expect("cap-reached header must be present");
9819            assert_eq!(
9820                cap_header.to_str().unwrap(),
9821                "true",
9822                "cap-reached header value must be 'true' when over cap"
9823            );
9824            // Parse body to verify shape — beyond-cap tenants have
9825            // null episode_count.
9826            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
9827            let body: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
9828            let arr = body["tenants"].as_array().expect("tenants array");
9829            assert_eq!(arr.len(), 51, "got {} tenants", arr.len());
9830            // The last (sorted-by-created_at_ms) tenant should be
9831            // beyond the cap. The hydration order matches the
9832            // filtered list order, so index 50 is the 51st tenant
9833            // and should have null episode_count.
9834            assert!(
9835                arr[50]["episode_count"].is_null(),
9836                "the 51st tenant (beyond cap) must have null episode_count, got {}",
9837                arr[50]
9838            );
9839        });
9840        h.shutdown(&runtime);
9841    }
9842
9843    /// v0.10.1 test 6: when the response is under the cap, the
9844    /// `X-Solo-Tenants-Count-Cap-Reached` header is absent. Pin the
9845    /// negative case so a future refactor that always emits the
9846    /// header (with "false") doesn't pass silently.
9847    #[test]
9848    fn tenants_response_omits_cap_header_when_under_cap() {
9849        let runtime = rt();
9850        let h = Harness::new(&runtime);
9851        let r = h.router.clone();
9852        runtime.block_on(async {
9853            seed_three_tenants(&h.registry).await;
9854            use axum::body::Body;
9855            use axum::http::Request;
9856            let req = Request::builder()
9857                .method("GET")
9858                .uri("/v1/tenants")
9859                .body(Body::empty())
9860                .unwrap();
9861            let resp = r.oneshot(req).await.unwrap();
9862            assert_eq!(resp.status(), StatusCode::OK);
9863            assert!(
9864                resp.headers().get(X_SOLO_TENANTS_COUNT_CAP_HEADER).is_none(),
9865                "cap-reached header must be absent under the cap"
9866            );
9867        });
9868        h.shutdown(&runtime);
9869    }
9870
9871    // ---- Pure unit tests on the visibility filter ----
9872    //
9873    // These exercise `filter_tenants_for_principal` and
9874    // `is_single_principal_bearer` without an axum router — fast
9875    // feedback for the load-bearing visibility rule. The
9876    // router-level tests above cover the wire path.
9877
9878    /// Build a synthetic `TenantRecord` so the pure unit tests don't
9879    /// need a real SQLCipher round-trip.
9880    fn make_record(id: &str) -> solo_storage::TenantRecord {
9881        solo_storage::TenantRecord {
9882            tenant_id: solo_core::TenantId::new(id).unwrap(),
9883            db_filename: format!("{id}.db"),
9884            display_name: None,
9885            created_at_ms: 0,
9886            status: solo_storage::TenantStatus::Active,
9887            quota_bytes: None,
9888            last_accessed_ms: None,
9889        }
9890    }
9891
9892    #[test]
9893    fn filter_no_principal_returns_all() {
9894        let records = vec![make_record("a"), make_record("b")];
9895        let out = filter_tenants_for_principal(records.clone(), None);
9896        assert_eq!(out.len(), 2);
9897        assert_eq!(out[0].tenant_id.as_str(), "a");
9898        assert_eq!(out[1].tenant_id.as_str(), "b");
9899    }
9900
9901    #[test]
9902    fn filter_bearer_principal_returns_all() {
9903        let records = vec![make_record("a"), make_record("b")];
9904        let p = AuthenticatedPrincipal::bearer(
9905            solo_core::TenantId::new("a").unwrap(),
9906        );
9907        let out = filter_tenants_for_principal(records, Some(&p));
9908        assert_eq!(out.len(), 2);
9909    }
9910
9911    #[test]
9912    fn filter_oidc_principal_keeps_only_claim() {
9913        let records = vec![make_record("a"), make_record("b"), make_record("c")];
9914        // OIDC-flavoured principal: non-bearer subject + JSON-object claims.
9915        let p = AuthenticatedPrincipal {
9916            subject: "alice@example.com".to_string(),
9917            tenant_claim: Some(solo_core::TenantId::new("b").unwrap()),
9918            scopes: vec!["read".to_string()],
9919            claims: serde_json::json!({ "sub": "alice@example.com" }),
9920        };
9921        let out = filter_tenants_for_principal(records, Some(&p));
9922        assert_eq!(out.len(), 1);
9923        assert_eq!(out[0].tenant_id.as_str(), "b");
9924    }
9925
9926    #[test]
9927    fn filter_oidc_principal_with_no_claim_returns_empty() {
9928        // Theoretically unreachable — middleware short-circuits at 403
9929        // before we see a no-claim OIDC principal. Defend anyway.
9930        let records = vec![make_record("a")];
9931        let p = AuthenticatedPrincipal {
9932            subject: "alice@example.com".to_string(),
9933            tenant_claim: None,
9934            scopes: vec![],
9935            claims: serde_json::json!({ "sub": "alice@example.com" }),
9936        };
9937        let out = filter_tenants_for_principal(records, Some(&p));
9938        assert!(out.is_empty());
9939    }
9940
9941    #[test]
9942    fn is_single_principal_bearer_discriminator() {
9943        let bearer = AuthenticatedPrincipal::bearer(
9944            solo_core::TenantId::new("default").unwrap(),
9945        );
9946        assert!(is_single_principal_bearer(&bearer));
9947
9948        let oidc = AuthenticatedPrincipal {
9949            subject: "alice".to_string(),
9950            tenant_claim: Some(solo_core::TenantId::new("alice").unwrap()),
9951            scopes: vec![],
9952            claims: serde_json::json!({ "x": 1 }),
9953        };
9954        assert!(!is_single_principal_bearer(&oidc));
9955
9956        // Subject == "bearer" but claims is a non-null object → not a
9957        // bearer-shaped principal. Defends against a forged-bearer
9958        // shape that might smuggle JWT claims.
9959        let weird = AuthenticatedPrincipal {
9960            subject: "bearer".to_string(),
9961            tenant_claim: Some(solo_core::TenantId::default_tenant()),
9962            scopes: vec![],
9963            claims: serde_json::json!({ "leak": 1 }),
9964        };
9965        assert!(!is_single_principal_bearer(&weird));
9966    }
9967
9968    // ---------------------------------------------------------------
9969    // v0.10.2 — MCP-over-HTTP transport on /mcp
9970    // ---------------------------------------------------------------
9971    //
9972    // These tests pin the wire contract for the new `/mcp` route added
9973    // in v0.10.2 P2. We exercise the route through the same `Harness`
9974    // pattern the rest of the file uses (in-process axum Router via
9975    // `tower::ServiceExt::oneshot`) — no real TCP listener needed.
9976    //
9977    // The dispatcher's unit tests live in `mcp_dispatch::tests` and
9978    // cover the JSON-RPC envelope shape in isolation. These tests are
9979    // the integration layer: real `TenantHandle`, real `WriterActor`,
9980    // real `SoloMcpServer::dispatch_tool` path.
9981
9982    /// `POST /mcp` with `{jsonrpc, id, method: "tools/list"}` returns
9983    /// the canonical 14 tools. Matches the stdio smoke test
9984    /// `mcp_stdio_lists_fourteen_canonical_tools` from
9985    /// `crates/solo-cli/tests/mcp_smoke.rs` so any drift between the
9986    /// two transports fails one of the two suites loudly.
9987    #[test]
9988    fn mcp_http_tools_list_returns_fourteen_canonical_tools() {
9989        let runtime = rt();
9990        let h = Harness::new(&runtime);
9991        let r = h.router.clone();
9992        runtime.block_on(async move {
9993            let req = json!({
9994                "jsonrpc": "2.0",
9995                "id": 1,
9996                "method": "tools/list",
9997            });
9998            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
9999            assert_eq!(status, StatusCode::OK);
10000            assert_eq!(body.get("jsonrpc").and_then(|v| v.as_str()), Some("2.0"));
10001            assert_eq!(body.get("id").and_then(|v| v.as_i64()), Some(1));
10002            let tools = body
10003                .pointer("/result/tools")
10004                .and_then(|v| v.as_array())
10005                .unwrap_or_else(|| panic!("missing /result/tools: {body}"));
10006            let mut names: Vec<String> = tools
10007                .iter()
10008                .filter_map(|t| t.get("name").and_then(|n| n.as_str()).map(String::from))
10009                .collect();
10010            names.sort();
10011            assert_eq!(
10012                names,
10013                vec![
10014                    "memory_contradictions".to_string(),
10015                    "memory_facts_about".to_string(),
10016                    "memory_forget".to_string(),
10017                    "memory_forget_document".to_string(),
10018                    "memory_ingest_document".to_string(),
10019                    "memory_inspect".to_string(),
10020                    "memory_inspect_cluster".to_string(),
10021                    "memory_inspect_document".to_string(),
10022                    "memory_list_documents".to_string(),
10023                    "memory_recall".to_string(),
10024                    "memory_remember".to_string(),
10025                    "memory_remember_batch".to_string(),
10026                    "memory_search_docs".to_string(),
10027                    "memory_themes".to_string(),
10028                ],
10029                "mcp_http: tools/list returned unexpected name set"
10030            );
10031        });
10032        h.shutdown(&runtime);
10033    }
10034
10035    /// `POST /mcp` with `tools/call` for `memory_remember` writes the
10036    /// episode and returns a confirmation string. Then a separate
10037    /// `GET /v1/graph/nodes` call (REST surface) sees the episode —
10038    /// proving one process is serving both surfaces against the same
10039    /// writer.
10040    #[test]
10041    fn mcp_http_remember_writes_episode_visible_via_graph_nodes() {
10042        let runtime = rt();
10043        let h = Harness::new(&runtime);
10044        let r = h.router.clone();
10045        runtime.block_on(async move {
10046            // 1. memory_remember via /mcp.
10047            let req = json!({
10048                "jsonrpc": "2.0",
10049                "id": 2,
10050                "method": "tools/call",
10051                "params": {
10052                    "name": "memory_remember",
10053                    "arguments": { "content": "mcp-http-cross-surface-smoke" },
10054                },
10055            });
10056            let (status, body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
10057            assert_eq!(status, StatusCode::OK);
10058            let result_text = body
10059                .pointer("/result/content/0/text")
10060                .and_then(|v| v.as_str())
10061                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
10062            assert!(
10063                result_text.starts_with("remembered "),
10064                "expected `remembered <id>`, got: {result_text}"
10065            );
10066
10067            // 2. Confirm via /v1/graph/nodes (REST). Same writer, same
10068            //    tenant — the cross-surface smoke that motivates v0.10.2.
10069            //    Episode nodes carry the content under `label` +
10070            //    `preview` (the v0.10.0 graph-nodes wire shape).
10071            let (status2, nodes_body) =
10072                call(r, "GET", "/v1/graph/nodes?kind=episode&limit=10", None).await;
10073            assert_eq!(status2, StatusCode::OK);
10074            let nodes = nodes_body
10075                .get("nodes")
10076                .and_then(|v| v.as_array())
10077                .unwrap_or_else(|| panic!("missing nodes: {nodes_body}"));
10078            assert!(
10079                nodes.iter().any(|n| {
10080                    let label_hit = n
10081                        .get("label")
10082                        .and_then(|c| c.as_str())
10083                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
10084                    let preview_hit = n
10085                        .get("preview")
10086                        .and_then(|c| c.as_str())
10087                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
10088                    label_hit || preview_hit
10089                }),
10090                "graph/nodes didn't surface the MCP-written episode: {nodes_body}"
10091            );
10092        });
10093        h.shutdown(&runtime);
10094    }
10095
10096    /// `POST /mcp` with `tools/call` for `memory_recall` returns the
10097    /// just-remembered episode. Smoke for the read path under the new
10098    /// transport.
10099    #[test]
10100    fn mcp_http_recall_returns_just_remembered_episode() {
10101        let runtime = rt();
10102        let h = Harness::new(&runtime);
10103        let r = h.router.clone();
10104        runtime.block_on(async move {
10105            // Remember first.
10106            let needle = "mcp-http-recall-needle-deadbeef";
10107            let req = json!({
10108                "jsonrpc": "2.0",
10109                "id": 3,
10110                "method": "tools/call",
10111                "params": {
10112                    "name": "memory_remember",
10113                    "arguments": { "content": needle },
10114                },
10115            });
10116            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
10117            assert_eq!(status, StatusCode::OK);
10118
10119            // Recall via the same /mcp transport.
10120            let req = json!({
10121                "jsonrpc": "2.0",
10122                "id": 4,
10123                "method": "tools/call",
10124                "params": {
10125                    "name": "memory_recall",
10126                    "arguments": { "query": needle, "limit": 5 },
10127                },
10128            });
10129            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
10130            assert_eq!(status, StatusCode::OK);
10131            let recall_text = body
10132                .pointer("/result/content/0/text")
10133                .and_then(|v| v.as_str())
10134                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
10135            assert!(
10136                recall_text.contains(needle),
10137                "recall didn't surface needle `{needle}`: {recall_text}"
10138            );
10139        });
10140        h.shutdown(&runtime);
10141    }
10142
10143    /// Malformed JSON body must surface as 400 (the wire envelope is
10144    /// invalid; the JSON-RPC layer never sees the request). The error
10145    /// body shape matches the rest of the API (`{error, status}`) so
10146    /// existing client error-handling paths keep working.
10147    #[test]
10148    fn mcp_http_malformed_body_returns_400() {
10149        let runtime = rt();
10150        let h = Harness::new(&runtime);
10151        let r = h.router.clone();
10152        runtime.block_on(async move {
10153            let req = Request::builder()
10154                .method("POST")
10155                .uri("/mcp")
10156                .header("content-type", "application/json")
10157                .body(Body::from("not-json-at-all".as_bytes()))
10158                .unwrap();
10159            let resp = r.oneshot(req).await.unwrap();
10160            assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
10161            let body_bytes =
10162                resp.into_body().collect().await.unwrap().to_bytes();
10163            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
10164            assert!(
10165                v.get("error")
10166                    .and_then(|e| e.as_str())
10167                    .map(|s| s.contains("invalid JSON-RPC request"))
10168                    .unwrap_or(false),
10169                "got: {v}"
10170            );
10171        });
10172        h.shutdown(&runtime);
10173    }
10174
10175    /// Wrong `jsonrpc` version must surface as 400. JSON-RPC 2.0 §4
10176    /// requires the literal string `"2.0"`.
10177    #[test]
10178    fn mcp_http_wrong_jsonrpc_version_returns_400() {
10179        let runtime = rt();
10180        let h = Harness::new(&runtime);
10181        let r = h.router.clone();
10182        runtime.block_on(async move {
10183            let req = json!({
10184                "jsonrpc": "1.0",
10185                "id": 1,
10186                "method": "tools/list",
10187            });
10188            let (status, _body) = call(r, "POST", "/mcp", Some(req)).await;
10189            assert_eq!(status, StatusCode::BAD_REQUEST);
10190        });
10191        h.shutdown(&runtime);
10192    }
10193
10194    /// Unknown method returns a JSON-RPC error envelope with code
10195    /// -32601 (METHOD_NOT_FOUND). HTTP status stays 200 because the
10196    /// envelope itself parsed fine — JSON-RPC errors are in-body.
10197    #[test]
10198    fn mcp_http_unknown_method_returns_in_body_method_not_found() {
10199        let runtime = rt();
10200        let h = Harness::new(&runtime);
10201        let r = h.router.clone();
10202        runtime.block_on(async move {
10203            let req = json!({
10204                "jsonrpc": "2.0",
10205                "id": 5,
10206                "method": "definitely/not/a/method",
10207            });
10208            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
10209            assert_eq!(status, StatusCode::OK);
10210            assert_eq!(
10211                body.pointer("/error/code").and_then(|v| v.as_i64()),
10212                Some(-32601),
10213                "expected JSON-RPC METHOD_NOT_FOUND (-32601), got: {body}"
10214            );
10215        });
10216        h.shutdown(&runtime);
10217    }
10218
10219    /// `POST /mcp` with the bearer-auth middleware enabled returns
10220    /// 401 without the token and 200 with the correct token.
10221    #[test]
10222    fn mcp_http_post_respects_bearer_auth() {
10223        let runtime = rt();
10224        let h = Harness::new_with_auth(&runtime, Some("secret-mcp-token".into()));
10225        let r = h.router.clone();
10226        runtime.block_on(async move {
10227            // No Authorization header → 401.
10228            let req = json!({
10229                "jsonrpc": "2.0",
10230                "id": 6,
10231                "method": "tools/list",
10232            });
10233            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req.clone())).await;
10234            assert_eq!(status, StatusCode::UNAUTHORIZED);
10235
10236            // With correct bearer → 200 + valid JSON-RPC reply.
10237            let (status, body) = call_with_auth(
10238                r,
10239                "POST",
10240                "/mcp",
10241                Some(req),
10242                Some("Bearer secret-mcp-token"),
10243            )
10244            .await;
10245            assert_eq!(status, StatusCode::OK);
10246            assert_eq!(
10247                body.pointer("/result/tools").and_then(|v| v.as_array()).map(|a| a.len()),
10248                Some(14),
10249                "authed tools/list should still return 14 tools: {body}"
10250            );
10251        });
10252        h.shutdown(&runtime);
10253    }
10254
10255    /// CORS preflight (`OPTIONS /mcp`) from a localhost origin returns
10256    /// 200 (tower-http's CorsLayer handles preflight implicitly) and
10257    /// the `access-control-allow-headers` carries both
10258    /// `x-solo-tenant` and `mcp-session-id`. Pins the v0.10.2
10259    /// allow-list addition.
10260    #[test]
10261    fn mcp_http_cors_preflight_allows_mcp_session_id_header() {
10262        let runtime = rt();
10263        let h = Harness::new(&runtime);
10264        let r = h.router.clone();
10265        runtime.block_on(async move {
10266            let req = Request::builder()
10267                .method("OPTIONS")
10268                .uri("/mcp")
10269                .header("origin", "http://localhost:5173")
10270                .header("access-control-request-method", "POST")
10271                .header(
10272                    "access-control-request-headers",
10273                    "content-type, mcp-session-id, x-solo-tenant, authorization",
10274                )
10275                .body(Body::empty())
10276                .unwrap();
10277            let resp = r.oneshot(req).await.unwrap();
10278            // tower-http CorsLayer returns 200 for permitted preflight.
10279            assert_eq!(resp.status(), StatusCode::OK);
10280            let allow_headers = resp
10281                .headers()
10282                .get("access-control-allow-headers")
10283                .and_then(|h| h.to_str().ok())
10284                .unwrap_or("")
10285                .to_lowercase();
10286            assert!(
10287                allow_headers.contains("mcp-session-id"),
10288                "preflight allow-headers must include mcp-session-id; got: {allow_headers}"
10289            );
10290            assert!(
10291                allow_headers.contains("x-solo-tenant"),
10292                "preflight allow-headers must still include x-solo-tenant; got: {allow_headers}"
10293            );
10294            // Allow-origin must echo the localhost origin (per the
10295            // permissive-localhost predicate).
10296            let allow_origin = resp
10297                .headers()
10298                .get("access-control-allow-origin")
10299                .and_then(|h| h.to_str().ok())
10300                .unwrap_or("");
10301            assert_eq!(allow_origin, "http://localhost:5173");
10302        });
10303        h.shutdown(&runtime);
10304    }
10305
10306    /// Notification messages (no `id`) return 202 Accepted with an
10307    /// empty body. Per JSON-RPC 2.0 §4.1 the server MUST NOT reply.
10308    #[test]
10309    fn mcp_http_notification_returns_202_accepted() {
10310        let runtime = rt();
10311        let h = Harness::new(&runtime);
10312        let r = h.router.clone();
10313        runtime.block_on(async move {
10314            let req = json!({
10315                "jsonrpc": "2.0",
10316                "method": "notifications/initialized",
10317                "params": {},
10318            });
10319            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
10320            assert_eq!(status, StatusCode::ACCEPTED);
10321            // Empty body — call() returns Value::Null when the body is
10322            // empty.
10323            assert_eq!(body, Value::Null);
10324        });
10325        h.shutdown(&runtime);
10326    }
10327
10328    /// `initialize` returns the `{name: "solo", version: <crate
10329    /// version>}` server-info pinned by the stdio invariant test
10330    /// `server_info_identity_is_solo_not_rmcp_or_solo_api`. Sanity
10331    /// check that the v0.10.2 HTTP transport doesn't drift away from
10332    /// the stdio identity.
10333    #[test]
10334    fn mcp_http_initialize_returns_solo_server_info() {
10335        let runtime = rt();
10336        let h = Harness::new(&runtime);
10337        let r = h.router.clone();
10338        runtime.block_on(async move {
10339            let req = json!({
10340                "jsonrpc": "2.0",
10341                "id": 7,
10342                "method": "initialize",
10343                "params": {
10344                    "protocolVersion": "2024-11-05",
10345                    "capabilities": {},
10346                    "clientInfo": { "name": "solo-http-test", "version": "0.0.0" },
10347                },
10348            });
10349            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
10350            assert_eq!(status, StatusCode::OK);
10351            assert_eq!(
10352                body.pointer("/result/serverInfo/name").and_then(|v| v.as_str()),
10353                Some("solo"),
10354                "serverInfo.name must be `solo`, not `solo-api` or `rmcp`; got: {body}"
10355            );
10356            // `protocolVersion` is the static value the dispatcher
10357            // emits today (2024-11-05). The stdio loop emits rmcp's
10358            // own default — we cross-check those two stay aligned in
10359            // the v0.10.3+ session work; for v0.10.2 we just pin the
10360            // HTTP-side value.
10361            assert_eq!(
10362                body.pointer("/result/protocolVersion").and_then(|v| v.as_str()),
10363                Some("2024-11-05"),
10364            );
10365        });
10366        h.shutdown(&runtime);
10367    }
10368}
10369
10370#[cfg(test)]
10371mod cors_tests {
10372    use super::is_localhost_origin;
10373
10374    #[test]
10375    fn accepts_canonical_localhost_origins() {
10376        assert!(is_localhost_origin("http://localhost"));
10377        assert!(is_localhost_origin("http://localhost:3000"));
10378        assert!(is_localhost_origin("https://localhost:8443"));
10379        assert!(is_localhost_origin("http://127.0.0.1"));
10380        assert!(is_localhost_origin("http://127.0.0.1:5173"));
10381        assert!(is_localhost_origin("http://[::1]"));
10382        assert!(is_localhost_origin("http://[::1]:8080"));
10383    }
10384
10385    #[test]
10386    fn rejects_remote_origins() {
10387        assert!(!is_localhost_origin("http://example.com"));
10388        assert!(!is_localhost_origin("https://malicious.example"));
10389        assert!(!is_localhost_origin("http://192.168.1.5"));
10390        assert!(!is_localhost_origin("http://10.0.0.1"));
10391    }
10392
10393    #[test]
10394    fn rejects_dns_rebinding_tricks() {
10395        // nip.io and friends — DNS that resolves to 127.0.0.1 but the
10396        // Origin header carries the public-DNS name. Rejecting these
10397        // closes the rebinding-via-Origin gap.
10398        assert!(!is_localhost_origin("http://127.0.0.1.nip.io"));
10399        assert!(!is_localhost_origin("http://localhost.evil.com"));
10400        assert!(!is_localhost_origin("http://evil.localhost"));
10401    }
10402
10403    #[test]
10404    fn rejects_non_http_schemes() {
10405        assert!(!is_localhost_origin("file:///"));
10406        assert!(!is_localhost_origin("ws://localhost:3000"));
10407        assert!(!is_localhost_origin("javascript:alert(1)"));
10408    }
10409
10410    #[test]
10411    fn rejects_malformed() {
10412        assert!(!is_localhost_origin(""));
10413        assert!(!is_localhost_origin("localhost"));
10414        assert!(!is_localhost_origin("//localhost"));
10415    }
10416}
10417