solo-api 0.10.2

// SPDX-License-Identifier: Apache-2.0

//! HTTP/JSON transport for Solo. Local-only by default — binds to
//! `127.0.0.1:<port>` and serves the same operations the MCP server
//! exposes:
//!
//! Episode operations:
//!   - `POST /memory`                — remember (body: { content, source_type?, source_id? })
//!   - `POST /memory/search`         — recall  (body: { query, limit? })
//!   - `GET  /memory/{id}`           — inspect
//!   - `DELETE /memory/{id}?reason=…` — forget
//!
//! Maintenance:
//!   - `POST /memory/consolidate`    — trigger a consolidation pass
//!   - `POST /backup`                — encrypted online backup
//!
//! Derived-layer (v0.4.0+; queries against the Steward's outputs):
//!   - `GET  /memory/themes?window_days=N&limit=K`
//!   - `GET  /memory/facts_about?subject=X&predicate=Y&since_ms=N&until_ms=N&include_as_object=B&limit=K`
//!   - `GET  /memory/contradictions?limit=K`
//!   - `GET  /memory/clusters/{cluster_id}?full_content=true` (v0.5.0+)
//!
//! Document operations (v0.7.0+):
//!   - `POST   /memory/documents`               — ingest a file
//!   - `POST   /memory/documents/search`        — vector search over chunks
//!   - `GET    /memory/documents`               — paginate documents
//!   - `GET    /memory/documents/{id}`          — inspect one document
//!   - `DELETE /memory/documents/{id}`          — soft-delete a document
//!
//! There's no auth at this layer. The threat model is local-machine
//! single-user; binding to `127.0.0.1` keeps the surface off the LAN.
//! A future commit can add bearer-token auth + LAN binding.
//!
//! ## Lifecycle
//!
//! `serve_http(addr, server, shutdown)` binds to `addr`, runs axum with
//! `with_graceful_shutdown(shutdown)`, returns when shutdown fires or
//! the listener errors. `solo http-serve` invokes this from inside a
//! `OneShotContext`, so writer + reader pool + lockfile stay live for
//! the server's lifetime and clean up properly afterwards.

use std::convert::Infallible;
use std::net::SocketAddr;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;

use axum::extract::{FromRequestParts, Path, Query, State};
use axum::http::request::Parts;
use axum::http::{HeaderValue, Method, StatusCode};
use axum::response::sse::{Event, KeepAlive, Sse};
use axum::response::{IntoResponse, Response};
use axum::routing::{get, post};
use axum::{Json, Router};
use futures::Stream;
use serde::{Deserialize, Serialize};
use solo_core::{
    Confidence, DocumentId, EncodingContext, Episode, InvalidateEvent, MemoryId, TenantId,
    Tier,
};
use solo_storage::{TenantHandle, TenantRegistry};
use tokio::sync::broadcast;
use tower_http::cors::{AllowOrigin, CorsLayer};
use tower_http::trace::TraceLayer;

use crate::auth::{AuthConfig, AuthenticatedPrincipal, middleware::AuthValidator};

/// HTTP-side application state. v0.8.0 P2 swapped per-handler `WriteHandle
/// + ReaderPool + ...` for a `TenantRegistry` that resolves tenant on each
/// request via the `X-Solo-Tenant` header (default tenant if absent).
#[derive(Clone)]
pub struct SoloHttpState {
    /// Multi-tenant registry. Lazy-loads tenants on first request.
    pub registry: Arc<TenantRegistry>,
    /// Default tenant used when the `X-Solo-Tenant` header is absent.
    /// Typically `TenantId::default_tenant()`.
    pub default_tenant: TenantId,
    /// Read-path aliases for the canonical `"user"` subject. Sourced
    /// from `solo.config.toml` `[identity] user_aliases`; threaded
    /// through to `solo_query::facts_about` so a query for `"alex"`
    /// also surfaces rows historically extracted as `"user"`. Empty
    /// vec = behave as today. Wrapped in `Arc` so handler `clone()`s
    /// stay cheap. v0.5.0 Priority 1 sub-step 1C.
    pub user_aliases: Arc<Vec<String>>,
}

/// HTTP header that routes a request to a specific tenant. Optional;
/// absent → state.default_tenant.
pub const TENANT_HEADER: &str = "x-solo-tenant";

/// Axum extractor that resolves the request's target tenant, then
/// lazy-opens the tenant via the registry.
///
/// Resolution order (v0.8.0 P3):
///   1. `AuthenticatedPrincipal.tenant_claim` from request extensions —
///      set by the auth middleware. In OIDC mode this is the validated
///      value of the configured custom claim (default `solo_tenant`);
///      in bearer mode this is the daemon's default tenant.
///   2. `X-Solo-Tenant` header — falls back to this when no
///      authenticated principal is on the request (unauthenticated
///      loopback deployments — the default).
///   3. `state.default_tenant` when neither is present.
///
/// Bad header values → 400. Lazy-open failures → 500 unless the failure
/// kind is `NotFound` (unknown tenant id) → 404.
pub struct TenantExtractor(pub Arc<TenantHandle>);

impl<S> FromRequestParts<S> for TenantExtractor
where
    SoloHttpState: FromRef<S>,
    S: Send + Sync,
{
    type Rejection = ApiError;

    async fn from_request_parts(parts: &mut Parts, state: &S) -> Result<Self, Self::Rejection> {
        let state = SoloHttpState::from_ref(state);
        // Order: (1) principal.tenant_claim (set by auth middleware),
        // (2) X-Solo-Tenant header, (3) state.default_tenant.
        //
        // The principal wins because in OIDC mode the JWT is the source
        // of truth — letting the header override an OIDC claim would
        // be a tenant-impersonation hole.
        let resolved = if let Some(principal) = parts.extensions.get::<AuthenticatedPrincipal>()
            && let Some(claim) = principal.tenant_claim.clone()
        {
            claim
        } else {
            match parts.headers.get(TENANT_HEADER) {
                None => state.default_tenant.clone(),
                Some(raw) => {
                    let s = raw.to_str().map_err(|e| {
                        ApiError::bad_request(format!(
                            "{TENANT_HEADER}: header value must be ASCII ({e})"
                        ))
                    })?;
                    TenantId::new(s.to_string()).map_err(|e| {
                        ApiError::bad_request(format!("{TENANT_HEADER}: invalid tenant id: {e}"))
                    })?
                }
            }
        };
        let handle = state.registry.get_or_open(&resolved).await.map_err(|e| {
            // Map NotFound → 404; everything else → 500.
            use solo_core::Error;
            match &e {
                Error::NotFound(_) => ApiError::not_found(e.to_string()),
                Error::InvalidInput(_) => ApiError::bad_request(e.to_string()),
                _ => ApiError::internal(e.to_string()),
            }
        })?;
        Ok(TenantExtractor(handle))
    }
}

use axum::extract::FromRef;

/// v0.8.0 P4: extractor that pulls the authenticated principal's
/// `subject` (JWT `sub` or `"bearer"`) out of request extensions for the
/// audit log. `None` when no `AuthenticatedPrincipal` is present
/// (unauthenticated loopback deployments).
pub struct AuditPrincipal(pub Option<String>);

impl<S> FromRequestParts<S> for AuditPrincipal
where
    S: Send + Sync,
{
    type Rejection = std::convert::Infallible;

    async fn from_request_parts(
        parts: &mut Parts,
        _state: &S,
    ) -> Result<Self, Self::Rejection> {
        Ok(AuditPrincipal(
            parts
                .extensions
                .get::<AuthenticatedPrincipal>()
                .map(|p| p.subject.clone()),
        ))
    }
}

/// v0.10.0: extractor that lifts the full `AuthenticatedPrincipal` out
/// of request extensions for the `/v1/tenants` handler. Distinct from
/// `AuditPrincipal` (which only carries `subject: Option<String>`) — the
/// tenant-list handler needs the `tenant_claim` and `claims` fields to
/// distinguish bearer (claims = Null) from OIDC (claims = JWT object)
/// principals.
///
/// `None` when no `AuthenticatedPrincipal` is on the request — the
/// unauthenticated loopback deployment path, which the tenant-list
/// handler treats as "all tenants visible" (same scope as the
/// `solo tenants list` CLI). See `docs/dev-log/0119-tenants-list-impl.md`
/// for the three-case visibility rule.
pub struct MaybePrincipal(pub Option<AuthenticatedPrincipal>);

impl<S> FromRequestParts<S> for MaybePrincipal
where
    S: Send + Sync,
{
    type Rejection = std::convert::Infallible;

    async fn from_request_parts(
        parts: &mut Parts,
        _state: &S,
    ) -> Result<Self, Self::Rejection> {
        Ok(MaybePrincipal(
            parts
                .extensions
                .get::<AuthenticatedPrincipal>()
                .cloned(),
        ))
    }
}

/// Build the router with optional bearer-token auth (v0.7.x legacy shape).
///
/// When `bearer_token` is `Some(t)`, every request except `GET /health`
/// + `GET /openapi.json` (unauthenticated probes / machine-readable spec)
/// requires `Authorization: Bearer t`. v0.8.0 P3 routes this through the
/// new `AuthValidator::Bearer` middleware so an `AuthenticatedPrincipal`
/// is attached to every authenticated request (the `TenantExtractor`
/// reads `principal.tenant_claim` ahead of the `X-Solo-Tenant` header).
pub fn router_with_auth(state: SoloHttpState, bearer_token: Option<String>) -> Router {
    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
    router_with_auth_config(state, auth)
}

/// Build the router with a config-driven auth block (v0.8.0 P3+).
///
/// `auth = Some(AuthConfig::Bearer { token })` is equivalent to passing
/// `Some(token)` to [`router_with_auth`]. `auth = Some(AuthConfig::Oidc { … })`
/// installs the OIDC middleware (JWKS fetch + cache + sig + claim checks).
/// `auth = None` runs unauthenticated — same `127.0.0.1` default as v0.7.x.
///
/// Public routes (`/health`, `/openapi.json`) are always exempt from
/// auth — load balancers, uptime monitors, and codegen tools shouldn't
/// need credentials.
pub fn router_with_auth_config(state: SoloHttpState, auth: Option<AuthConfig>) -> Router {
    let cors = build_cors_layer();
    // Public, always-unauthenticated routes:
    //   - GET /health: liveness probe (load balancers, uptime monitors).
    //   - GET /openapi.json: machine-readable API description for client
    //     codegen + browser-UI tooling (TypeScript / OpenAPI Generator,
    //     curl-tools, etc.). The spec describes the API shape, not
    //     secrets — fine to serve unauthenticated even on a LAN-bound
    //     instance.
    let public = Router::new()
        .route("/health", get(|| async { "ok" }))
        .route("/openapi.json", get(openapi_handler));

    let authed = Router::new()
        .route("/memory", post(remember_handler))
        .route("/memory/search", post(recall_handler))
        .route("/memory/consolidate", post(consolidate_handler))
        .route("/memory/{id}", get(inspect_handler).delete(forget_handler))
        .route("/backup", post(backup_handler))
        // Path 1 derived-layer endpoints (v0.4.0+). GET-shaped because
        // these are pure read-only queries; query-string params for
        // simple filters keep them curl-friendly without a JSON body.
        .route("/memory/themes", get(themes_handler))
        .route("/memory/facts_about", get(facts_about_handler))
        .route("/memory/contradictions", get(contradictions_handler))
        // v0.5.0 Priority 3: drill into one cluster + abstraction +
        // episodes. Two-segment path (`/memory/clusters/{id}`) so it
        // does not shadow the single-segment `/memory/{id}` UUID
        // inspect route.
        .route(
            "/memory/clusters/{cluster_id}",
            get(inspect_cluster_handler),
        )
        // v0.7.0 P6: document operations. Two-segment paths
        // (`/memory/documents/...`) so they don't shadow the
        // single-segment `/memory/{id}` episode-inspect route. Order
        // matters: register the literal `/memory/documents/search`
        // ahead of `/memory/documents/{id}` so axum's matcher prefers
        // the literal over the path parameter.
        .route(
            "/memory/documents/search",
            post(search_docs_handler),
        )
        .route(
            "/memory/documents",
            post(ingest_document_handler).get(list_documents_handler),
        )
        .route(
            "/memory/documents/{id}",
            get(inspect_document_handler).delete(forget_document_handler),
        )
        // v0.9.x: graph drill-down for solo-web. Read-only neighbor
        // expansion off any node in the memory graph. See
        // `docs/dev-log/0105-solo-web-scoping.md` §4 + the impl dev log
        // for the full `/v1/graph/*` family this is the first of.
        .route("/v1/graph/expand", get(graph_expand_handler))
        // v0.10.0: paginated catalog reads for solo-web's initial graph
        // render. See `docs/dev-log/0114-graph-nodes-edges-impl.md`
        // alongside the same scoping doc.
        .route("/v1/graph/nodes", get(graph_nodes_handler))
        .route("/v1/graph/edges", get(graph_edges_handler))
        // v0.10.0: kind-discriminated full-record drill for solo-web's
        // inspector panel. See `docs/dev-log/0115-graph-inspect-impl.md`.
        .route("/v1/graph/inspect/{id}", get(graph_inspect_handler))
        // v0.10.0: unified explicit + HNSW-semantic neighbors for solo-
        // web's "show similar" overlay. See
        // `docs/dev-log/0116-graph-neighbors-impl.md`.
        .route("/v1/graph/neighbors/{id}", get(graph_neighbors_handler))
        // v0.10.0: Server-Sent Events stream of graph-data invalidations
        // for solo-web's live update story. The wire format is
        // INVALIDATION-shaped (`{reason, tenant_id, ts_ms, kind}`) per
        // scoping doc §3 Decision C — clients refetch the affected page
        // on each event rather than receiving row payloads. See
        // `docs/dev-log/0117-graph-stream-impl.md`.
        .route("/v1/graph/stream", get(graph_stream_handler))
        // v0.10.0: principal-scoped tenant list for solo-web's top-bar
        // tenant picker. Read-only — admin CRUD (create/delete) remains
        // CLI-only per ADR-0004 §"Admin operations". The visibility
        // filter is principal-driven: no-auth + bearer principals see
        // every active tenant; OIDC principals see only the tenant
        // named by their `tenant_claim`. See
        // `docs/dev-log/0119-tenants-list-impl.md` + scoping doc §3
        // Decision F + §4 Route 6.
        .route("/v1/tenants", get(tenants_list_handler))
        // v0.10.2: MCP-over-HTTP transport on /mcp. Lets one Solo
        // process serve both `/v1/graph/*` (REST, for solo-web) and
        // `/mcp` (JSON-RPC, for solo-jarvis) without the
        // single-writer-per-data-dir lock dance. See
        // `docs/dev-log/0129-v0.10.2-mcp-over-http-impl.md` for the spec.
        // POST + GET share the same path; axum's `MethodRouter` muxes
        // by HTTP method. OPTIONS is handled by the `CorsLayer`
        // (already wired below) — we don't need an explicit handler.
        .route("/mcp", post(mcp_http_post_handler).get(mcp_http_get_handler))
        .with_state(state.clone());

    let authed = if let Some(cfg) = auth {
        // v0.8.0 P3: dispatch via AuthValidator (bearer | OIDC), inserts
        // AuthenticatedPrincipal into request extensions for the
        // TenantExtractor + audit-log to read.
        let validator = Arc::new(AuthValidator::from_config(
            &cfg,
            state.default_tenant.clone(),
        ));
        authed.layer(axum::middleware::from_fn_with_state(
            validator,
            crate::auth::middleware::auth_middleware,
        ))
    } else {
        authed
    };

    public
        .merge(authed)
        .layer(cors)
        .layer(TraceLayer::new_for_http())
}

/// Convenience wrapper: no auth (loopback-only deployments).
pub fn router(state: SoloHttpState) -> Router {
    router_with_auth_config(state, None)
}

fn build_cors_layer() -> CorsLayer {
    // Permissive-localhost CORS: allow any localhost / 127.0.0.1 origin so
    // browser-based UIs running on a different local port can call the API
    // without preflight friction. We do NOT use `Any` because that would
    // allow arbitrary remote origins to talk to our localhost server via
    // a victim's browser. With bearer-token auth enabled the practical
    // impact is reduced (the cross-origin attacker still can't supply
    // the token), but principle of least privilege says refuse anyway.
    //
    // When the server is bound to a non-loopback address (auth required),
    // the same CORS predicate keeps localhost-only browser clients —
    // suitable for trusted-LAN deployments where the LAN client itself
    // tunnels through ssh/wireguard back to localhost. Wider CORS for
    // genuine cross-origin browser use is a future config knob.
    CorsLayer::new()
        .allow_origin(AllowOrigin::predicate(|origin: &HeaderValue, _req| {
            origin
                .to_str()
                .map(is_localhost_origin)
                .unwrap_or(false)
        }))
        .allow_methods([Method::GET, Method::POST, Method::DELETE, Method::OPTIONS])
        .allow_headers([
            axum::http::header::CONTENT_TYPE,
            axum::http::header::AUTHORIZATION,
            // Custom Solo headers — browsers preflight-check these and
            // refuse the actual request if they're not in the allow list.
            // Without `x-solo-tenant` solo-web's browser fetches all fail
            // with "Failed to fetch" (CORS preflight rejection).
            axum::http::HeaderName::from_static("x-solo-tenant"),
            // v0.10.2: `Mcp-Session-Id` is part of the MCP Streamable
            // HTTP transport spec (sessions, resumable streams). Solo's
            // v0.10.2 `/mcp` route does NOT implement sessions yet —
            // each POST is one-shot — but the header is in the
            // allow-list ahead of time so browser-based MCP clients
            // that preflight for it (per the spec) succeed instead of
            // failing with a CORS error before the first request even
            // lands. v0.10.3+ wires the actual session affinity.
            axum::http::HeaderName::from_static("mcp-session-id"),
        ])
}

/// True if `origin` is `http(s)://localhost[:port]` or
/// `http(s)://127.0.0.1[:port]` or `http(s)://[::1][:port]` (loopback IPv6).
/// Anything else (incl. nip.io tricks like `127.0.0.1.nip.io`) is rejected.
fn is_localhost_origin(origin: &str) -> bool {
    let rest = origin
        .strip_prefix("http://")
        .or_else(|| origin.strip_prefix("https://"));
    let host = match rest {
        Some(r) => r,
        None => return false,
    };
    // Strip path (shouldn't appear on Origin headers but defend anyway).
    let host = host.split('/').next().unwrap_or(host);
    // Strip port.
    let host = if let Some(idx) = host.rfind(':') {
        // For [::1]:port, keep the brackets in the host part.
        if host.starts_with('[') {
            // Find matching ']'; everything up to and including it is the host.
            host.find(']')
                .map(|i| &host[..=i])
                .unwrap_or(host)
        } else {
            &host[..idx]
        }
    } else {
        host
    };
    matches!(host, "localhost" | "127.0.0.1" | "[::1]")
}

/// Bind + serve (v0.7.x legacy shape). `shutdown` is awaited inside
/// axum's `with_graceful_shutdown`; resolving it triggers a clean drain.
/// `bearer_token = None` runs unauthenticated (loopback default);
/// `Some(t)` requires `Authorization: Bearer t` on every request
/// except `GET /health` + `GET /openapi.json`.
pub async fn serve_http(
    addr: SocketAddr,
    state: SoloHttpState,
    bearer_token: Option<String>,
    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
) -> std::io::Result<()> {
    let auth = bearer_token.map(|token| AuthConfig::Bearer { token });
    serve_http_with_auth_config(addr, state, auth, shutdown).await
}

/// Bind + serve with a config-driven auth block (v0.8.0 P3+).
/// `auth = None` runs unauthenticated. See [`router_with_auth_config`]
/// for the auth-mode semantics.
pub async fn serve_http_with_auth_config(
    addr: SocketAddr,
    state: SoloHttpState,
    auth: Option<AuthConfig>,
    shutdown: impl std::future::Future<Output = ()> + Send + 'static,
) -> std::io::Result<()> {
    let auth_kind = match &auth {
        Some(AuthConfig::Bearer { .. }) => "bearer",
        Some(AuthConfig::Oidc { .. }) => "oidc",
        None => "none",
    };
    let app = router_with_auth_config(state, auth);
    let listener = tokio::net::TcpListener::bind(addr).await?;
    tracing::info!(%addr, auth = auth_kind, "solo http: listening");
    axum::serve(listener, app)
        .with_graceful_shutdown(shutdown)
        .await
}

// ---------------------------------------------------------------------------
// OpenAPI 3.1 spec
// ---------------------------------------------------------------------------

/// Serve the hand-crafted OpenAPI 3.1 spec at `GET /openapi.json`.
///
/// We keep the spec hand-written (rather than deriving via `utoipa`)
/// for v0.1: 4 simple endpoints, types live across crate boundaries
/// (`solo_query::RecallResult`, `solo_query::EpisodeRecord`), and a
/// `utoipa` retrofit would touch every crate. Hand-crafted is one
/// JSON literal in this file; a smoke test in `handler_tests` parses
/// the response and asserts the expected paths + components are
/// present, so drift between spec and code is caught at PR time.
async fn openapi_handler() -> Json<serde_json::Value> {
    Json(openapi_spec())
}

/// Build the OpenAPI 3.1 spec describing Solo's HTTP transport.
/// Public so the smoke test + future client-codegen tooling can
/// produce the same document without spinning up the server.
pub fn openapi_spec() -> serde_json::Value {
    serde_json::json!({
        "openapi": "3.1.0",
        "info": {
            "title": "Solo HTTP API",
            "description":
                "Local-first personal memory daemon. The HTTP transport \
                 mirrors the four MCP tools (memory_remember / recall / \
                 inspect / forget). Default deployment is loopback-only \
                 (127.0.0.1); LAN-bound deployments require a bearer \
                 token via `solo http-serve --bind <ip> --bearer-token-file <path>`.",
            "version": env!("CARGO_PKG_VERSION"),
            "license": { "name": "Apache-2.0" }
        },
        "servers": [
            { "url": "http://127.0.0.1:7437", "description": "Default loopback (replace port with your --http-port)" }
        ],
        "components": {
            "securitySchemes": {
                "bearerAuth": {
                    "type": "http",
                    "scheme": "bearer",
                    "description":
                        "Bearer-token auth. Required only on LAN-bound deployments \
                         (`solo http-serve --bind <non-loopback> --bearer-token-file <path>`); \
                         the default `127.0.0.1` deployment is unauthenticated. \
                         `GET /health` and `GET /openapi.json` are exempt from auth even \
                         on bearer-protected instances."
                }
            },
            "schemas": {
                "RememberRequest": {
                    "type": "object",
                    "required": ["content"],
                    "properties": {
                        "content": { "type": "string", "minLength": 1, "description": "Episode content to embed + store." },
                        "source_type": { "type": "string", "description": "Free-form source tag (e.g. `user_message`, `tool_output`). Defaults to `user_message`." },
                        "source_id": { "type": "string", "description": "Optional upstream ID for traceability." }
                    },
                    "additionalProperties": false
                },
                "RememberResponse": {
                    "type": "object",
                    "required": ["memory_id"],
                    "properties": {
                        "memory_id": { "type": "string", "format": "uuid", "description": "UUID v7 assigned to the new episode." }
                    }
                },
                "RecallRequest": {
                    "type": "object",
                    "required": ["query"],
                    "properties": {
                        "query": { "type": "string", "minLength": 1, "description": "Natural-language query; embedded by the same model as stored episodes." },
                        "limit": { "type": "integer", "minimum": 1, "maximum": 50, "default": 5, "description": "Max number of hits to return." }
                    },
                    "additionalProperties": false
                },
                "RecallResult": {
                    "type": "object",
                    "description":
                        "Recall response. Fields are stable across v0.1 but not exhaustively documented here — \
                         see `solo_query::RecallResult` in the source for the canonical shape. \
                         Treat as a forward-compatible JSON object.",
                    "additionalProperties": true
                },
                "ConsolidationScope": {
                    "type": "object",
                    "description": "Filter + flags for consolidation. All fields optional; empty body = unbounded defaults.",
                    "properties": {
                        "window_days": { "type": "integer", "nullable": true, "description": "Restrict to memories with ts_ms >= now - window_days * 86400000. Null/omitted = unbounded." },
                        "force_merge": { "type": "boolean", "default": false, "description": "Run the existing-vs-existing merge + abstraction-regen passes even with zero unclustered candidates. Drift catch-up on quiet corpora. Added in 0.3.1." }
                    },
                    "additionalProperties": false
                },
                "ConsolidationReport": {
                    "type": "object",
                    "required": [
                        "episodes_seen", "clusters_built", "clusters_merged",
                        "clusters_absorbed", "existing_clusters_merged",
                        "episodes_clustered", "abstractions_built",
                        "abstractions_regenerated", "triples_built",
                        "contradictions_found"
                    ],
                    "properties": {
                        "episodes_seen":             { "type": "integer", "minimum": 0 },
                        "clusters_built":            { "type": "integer", "minimum": 0, "description": "Brand-new clusters that survived to be persisted (post in-run-merge, post cross-run-absorb)." },
                        "clusters_merged":           { "type": "integer", "minimum": 0, "description": "In-run merge: clusters absorbed into a sibling within this consolidate run (cross-UTC-bucket case). Counts losers." },
                        "clusters_absorbed":         { "type": "integer", "minimum": 0, "description": "Cross-run absorb: freshly-built clusters folded into a pre-existing DB cluster with a similar centroid. Counts new-side clusters." },
                        "existing_clusters_merged":  { "type": "integer", "minimum": 0, "description": "Existing-vs-existing merge: pre-existing DB clusters that drifted toward each other and now coalesce. Counts losers." },
                        "episodes_clustered":        { "type": "integer", "minimum": 0 },
                        "abstractions_built":        { "type": "integer", "minimum": 0, "description": "Fresh abstractions persisted for newly-built clusters. 0 when no LlmClient is wired." },
                        "abstractions_regenerated":  { "type": "integer", "minimum": 0, "description": "Existing clusters whose stale abstractions were dropped and rebuilt because absorb or existing-merge changed their episode set. 0 without an LlmClient." },
                        "triples_built":             { "type": "integer", "minimum": 0 },
                        "contradictions_found":      { "type": "integer", "minimum": 0 }
                    }
                },
                "EpisodeRecord": {
                    "type": "object",
                    "description":
                        "Inspect response: full episode record. Fields are stable across v0.1 but not \
                         exhaustively documented here — see `solo_query::EpisodeRecord` in the source. \
                         Treat as a forward-compatible JSON object.",
                    "additionalProperties": true
                },
                "ThemeHit": {
                    "type": "object",
                    "description":
                        "One cluster + its (optional) abstraction. Returned by GET /memory/themes. \
                         See `solo_query::ThemeHit` for the canonical shape: cluster_id, \
                         abstraction_id?, abstraction_text?, episode_count, coherence, created_at_ms.",
                    "additionalProperties": true
                },
                "FactHit": {
                    "type": "object",
                    "description":
                        "One Steward-extracted SPO triple. Returned by GET /memory/facts_about. \
                         See `solo_query::FactHit` for fields: triple_id, subject_id, predicate, \
                         object_id, object_kind, valid_from_ms, valid_to_ms?, confidence, cluster_id?.",
                    "additionalProperties": true
                },
                "ContradictionHit": {
                    "type": "object",
                    "description":
                        "One Steward-flagged contradiction with each side's triple LEFT JOIN'd in. \
                         Returned by GET /memory/contradictions. See `solo_query::ContradictionHit`: \
                         a_id, b_id, kind, explanation, detected_at_ms, a_triple?, b_triple?.",
                    "additionalProperties": true
                },
                "ClusterRecord": {
                    "type": "object",
                    "description":
                        "Snapshot of one cluster — its row, optional abstraction, and source episodes \
                         (content truncated to 200 chars unless ?full_content=true). Returned by \
                         GET /memory/clusters/{cluster_id}. See `solo_query::ClusterRecord`.",
                    "additionalProperties": true
                },
                "IngestDocumentRequest": {
                    "type": "object",
                    "required": ["path"],
                    "properties": {
                        "path": {
                            "type": "string",
                            "minLength": 1,
                            "description":
                                "Server-side absolute path to the file to ingest. The file must be \
                                 readable by the Solo process. Supported formats: plaintext / \
                                 markdown / code, HTML, PDF."
                        }
                    },
                    "additionalProperties": false
                },
                "IngestReport": {
                    "type": "object",
                    "description":
                        "Returned by POST /memory/documents. Reports the document id assigned, \
                         the number of chunks persisted + embedded, the total byte size, and a \
                         `deduped` flag (true when the same content_hash was already present and \
                         the existing doc_id was returned unchanged). See `solo_storage::IngestReport`.",
                    "required": ["doc_id", "chunks_persisted", "bytes_ingested", "deduped"],
                    "properties": {
                        "doc_id":            { "type": "string", "format": "uuid" },
                        "chunks_persisted":  { "type": "integer", "minimum": 0 },
                        "bytes_ingested":    { "type": "integer", "minimum": 0, "format": "int64" },
                        "deduped":           { "type": "boolean" }
                    },
                    "additionalProperties": false
                },
                "ForgetDocumentReport": {
                    "type": "object",
                    "description":
                        "Returned by DELETE /memory/documents/{id}. Reports the doc_id soft-deleted \
                         and how many chunk rowids were tombstoned in the HNSW index. The chunk rows \
                         themselves survive in SQL for forensic value. See `solo_storage::ForgetDocumentReport`.",
                    "required": ["doc_id", "chunks_tombstoned"],
                    "properties": {
                        "doc_id":             { "type": "string", "format": "uuid" },
                        "chunks_tombstoned":  { "type": "integer", "minimum": 0 }
                    },
                    "additionalProperties": false
                },
                "SearchDocsRequest": {
                    "type": "object",
                    "required": ["query"],
                    "properties": {
                        "query": { "type": "string", "minLength": 1 },
                        "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 }
                    },
                    "additionalProperties": false
                },
                "DocSearchHit": {
                    "type": "object",
                    "description":
                        "One chunk hit + parent-doc context. Fields per `solo_query::DocSearchHit`: \
                         chunk_id, doc_id, doc_title?, doc_source?, doc_mime_type?, chunk_index, \
                         content, cos_distance, start_offset, end_offset.",
                    "additionalProperties": true
                },
                "DocumentInspectResult": {
                    "type": "object",
                    "description":
                        "Returned by GET /memory/documents/{id}. A `document` record (full metadata) \
                         plus an ordered list of chunk summaries (each preview truncated to 200 \
                         chars). See `solo_query::DocumentInspectResult`.",
                    "additionalProperties": true
                },
                "DocumentSummary": {
                    "type": "object",
                    "description":
                        "One row from GET /memory/documents. Fields per `solo_query::DocumentSummary`: \
                         doc_id, title?, source?, mime_type?, ingested_at_ms, chunk_count, status.",
                    "additionalProperties": true
                },
                "ApiError": {
                    "type": "object",
                    "required": ["error", "status"],
                    "properties": {
                        "error": { "type": "string" },
                        "status": { "type": "integer", "minimum": 400, "maximum": 599 }
                    }
                }
            }
        },
        "paths": {
            "/health": {
                "get": {
                    "summary": "Liveness probe",
                    "description": "Returns plain text `ok`. Always unauthenticated.",
                    "responses": {
                        "200": {
                            "description": "Server is up.",
                            "content": { "text/plain": { "schema": { "type": "string", "example": "ok" } } }
                        }
                    }
                }
            },
            "/openapi.json": {
                "get": {
                    "summary": "Self-describing OpenAPI 3.1 spec",
                    "description": "Returns this document. Always unauthenticated.",
                    "responses": {
                        "200": {
                            "description": "OpenAPI 3.1 document.",
                            "content": { "application/json": { "schema": { "type": "object" } } }
                        }
                    }
                }
            },
            "/memory": {
                "post": {
                    "summary": "Remember (store an episode)",
                    "description": "Equivalent to MCP tool `memory_remember`.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "requestBody": {
                        "required": true,
                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberRequest" } } }
                    },
                    "responses": {
                        "200": {
                            "description": "Memory stored; returns the new MemoryId.",
                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RememberResponse" } } }
                        },
                        "400": { "description": "Bad request (e.g. empty content).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/memory/search": {
                "post": {
                    "summary": "Recall (vector search)",
                    "description": "Equivalent to MCP tool `memory_recall`. Embeds the query, runs HNSW search, returns the top-K hits in cosine-distance order.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "requestBody": {
                        "required": true,
                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallRequest" } } }
                    },
                    "responses": {
                        "200": {
                            "description": "Search results.",
                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RecallResult" } } }
                        },
                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/memory/consolidate": {
                "post": {
                    "summary": "Run a consolidation pass (clustering + abstraction)",
                    "description":
                        "Idempotent. Triggers the SWS-equivalent clustering pass; if a `Steward` LLM is wired \
                         on the server, also runs the REM-equivalent abstraction pass that populates \
                         `semantic_abstractions` and `triples`. Empty request body = default scope (unbounded \
                         window). Equivalent to the `solo consolidate` CLI.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "requestBody": {
                        "required": false,
                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationScope" } } }
                    },
                    "responses": {
                        "200": {
                            "description": "Consolidation complete; report counts the work done.",
                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ConsolidationReport" } } }
                        },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/backup": {
                "post": {
                    "summary": "Online encrypted backup",
                    "description":
                        "Run an online SQLCipher backup of the live data dir to a server-side path. \
                         The destination file is encrypted with the same Argon2id-derived raw key as \
                         the source, so it restores under the same passphrase + a copy of the source's \
                         `solo.config.toml`. Hot — the backup runs against the writer's existing \
                         connection without taking the lockfile, so the daemon keeps serving reads + \
                         writes during the operation. v0.3.2+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "requestBody": {
                        "required": true,
                        "content": { "application/json": { "schema": {
                            "type": "object",
                            "properties": {
                                "to": { "type": "string", "description": "Server-side absolute path for the backup file." },
                                "force": { "type": "boolean", "description": "Overwrite an existing destination file. Default false.", "default": false }
                            },
                            "required": ["to"]
                        } } }
                    },
                    "responses": {
                        "200": {
                            "description": "Backup complete; reports the destination path + elapsed milliseconds.",
                            "content": { "application/json": { "schema": {
                                "type": "object",
                                "properties": {
                                    "path": { "type": "string" },
                                    "elapsed_ms": { "type": "integer", "format": "int64" }
                                }
                            } } }
                        },
                        "400": { "description": "Destination invalid, exists without force, or its parent doesn't exist." },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." },
                        "500": { "description": "Backup failed (disk full, permission denied, etc.)." }
                    }
                }
            },
            "/memory/{id}": {
                "get": {
                    "summary": "Inspect a memory by ID",
                    "description": "Equivalent to MCP tool `memory_inspect`.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "parameters": [{
                        "name": "id",
                        "in": "path",
                        "required": true,
                        "schema": { "type": "string", "format": "uuid" },
                        "description": "MemoryId (UUID v7)."
                    }],
                    "responses": {
                        "200": {
                            "description": "Episode record.",
                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/EpisodeRecord" } } }
                        },
                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                },
                "delete": {
                    "summary": "Forget (soft-delete) a memory by ID",
                    "description":
                        "Equivalent to MCP tool `memory_forget`. Soft-delete: flips `episodes.status = 'forgotten'` \
                         and tombstones the HNSW vector. The row + embedding are preserved for forensics; \
                         re-running `solo reembed` after this does NOT restore visibility.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "parameters": [
                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } },
                        { "name": "reason", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Free-form reason logged via tracing (not yet persisted to the DB)." }
                    ],
                    "responses": {
                        "204": { "description": "Forgotten (or already forgotten — idempotent)." },
                        "400": { "description": "Malformed ID.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "404": { "description": "No such memory.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/memory/themes": {
                "get": {
                    "summary": "List recent cluster themes",
                    "description":
                        "Equivalent to MCP tool `memory_themes`. List cluster abstractions ordered by \
                         most-recent first. Use to surface 'what has the user been thinking about lately' \
                         without paging through individual episodes. v0.4.0+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "parameters": [
                        { "name": "window_days", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1 }, "description": "Optional time window. Omit for unfiltered (all-time, most-recent first)." },
                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
                    ],
                    "responses": {
                        "200": {
                            "description": "Array of ThemeHits (possibly empty).",
                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ThemeHit" } } } }
                        },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/memory/facts_about": {
                "get": {
                    "summary": "Query the SPO knowledge graph by subject",
                    "description":
                        "Equivalent to MCP tool `memory_facts_about`. Query Steward-extracted triples by \
                         subject + optional predicate + optional time window. Subject is required \
                         (predicate-only scans not supported). Pass `include_as_object=true` (v0.5.1+) \
                         to also surface rows where `subject` appears as the object. v0.4.0+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "parameters": [
                        { "name": "subject", "in": "query", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Subject id to query (e.g. `Sam`)." },
                        { "name": "predicate", "in": "query", "required": false, "schema": { "type": "string" }, "description": "Optional predicate filter (e.g. `works_at`)." },
                        { "name": "since_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_from_ms lower bound (epoch ms)." },
                        { "name": "until_ms", "in": "query", "required": false, "schema": { "type": "integer" }, "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through." },
                        { "name": "include_as_object", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, also match rows where `subject` appears as the object (e.g. surface 'Sam pushes back on PRs about Maya' under subject='Maya'). Default false. v0.5.1+." },
                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
                    ],
                    "responses": {
                        "200": {
                            "description": "Array of FactHits (possibly empty).",
                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/FactHit" } } } }
                        },
                        "400": { "description": "Bad request (e.g. empty subject).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/memory/contradictions": {
                "get": {
                    "summary": "List Steward-flagged contradictions",
                    "description":
                        "Equivalent to MCP tool `memory_contradictions`. Each result includes both \
                         sides' triple SPO via LEFT JOIN for context. v0.4.0+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "parameters": [
                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 5 } }
                    ],
                    "responses": {
                        "200": {
                            "description": "Array of ContradictionHits (possibly empty).",
                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ContradictionHit" } } } }
                        },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/memory/clusters/{cluster_id}": {
                "get": {
                    "summary": "Inspect a single cluster",
                    "description":
                        "Equivalent to MCP tool `memory_inspect_cluster`. Returns the cluster row, \
                         its (optional) abstraction, and its source episodes. By default each \
                         episode's `content` is truncated to 200 chars with a trailing `…`. Pass \
                         `?full_content=true` to get verbatim episode content. v0.5.0+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "parameters": [
                        { "name": "cluster_id", "in": "path", "required": true, "schema": { "type": "string", "minLength": 1 }, "description": "Cluster id (from a previous GET /memory/themes response)." },
                        { "name": "full_content", "in": "query", "required": false, "schema": { "type": "boolean", "default": false }, "description": "If true, return episode content verbatim. Default false (truncate to 200 chars + ellipsis)." }
                    ],
                    "responses": {
                        "200": {
                            "description": "Cluster snapshot.",
                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ClusterRecord" } } }
                        },
                        "400": { "description": "Bad request (e.g. empty cluster_id).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "404": { "description": "No such cluster.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/memory/documents": {
                "post": {
                    "summary": "Ingest a document",
                    "description":
                        "Equivalent to MCP tool `memory_ingest_document`. Reads the file at the \
                         supplied server-side path, parses + chunks + embeds, and persists under \
                         `documents` + `document_chunks`. Returns the new doc_id, chunk count, and \
                         a `deduped` flag (true when an existing document with the same content_hash \
                         was returned without re-embedding). v0.7.0+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "requestBody": {
                        "required": true,
                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestDocumentRequest" } } }
                    },
                    "responses": {
                        "200": {
                            "description": "Document ingested (or deduplicated).",
                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/IngestReport" } } }
                        },
                        "400": { "description": "Bad request (e.g. empty path, file unreadable, parse error).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                },
                "get": {
                    "summary": "List ingested documents (paginated)",
                    "description":
                        "Equivalent to MCP tool `memory_list_documents`. Returns a paginated index, \
                         newest first. Forgotten documents are hidden by default; pass \
                         `?include_forgotten=true` to see them too. v0.7.0+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "parameters": [
                        { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 1, "maximum": 100, "default": 20 } },
                        { "name": "offset", "in": "query", "required": false, "schema": { "type": "integer", "minimum": 0, "default": 0 } },
                        { "name": "include_forgotten", "in": "query", "required": false, "schema": { "type": "boolean", "default": false } }
                    ],
                    "responses": {
                        "200": {
                            "description": "Array of DocumentSummary (possibly empty).",
                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocumentSummary" } } } }
                        },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/memory/documents/search": {
                "post": {
                    "summary": "Vector search across document chunks",
                    "description":
                        "Equivalent to MCP tool `memory_search_docs`. Embeds the query and returns \
                         up to `limit` matching chunks, best match first, each annotated with the \
                         parent document's title + source path. Forgotten documents are excluded. \
                         v0.7.0+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "requestBody": {
                        "required": true,
                        "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SearchDocsRequest" } } }
                    },
                    "responses": {
                        "200": {
                            "description": "Array of DocSearchHits (possibly empty).",
                            "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/DocSearchHit" } } } }
                        },
                        "400": { "description": "Bad request (e.g. empty query).", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            },
            "/memory/documents/{id}": {
                "get": {
                    "summary": "Inspect one document",
                    "description":
                        "Equivalent to MCP tool `memory_inspect_document`. Returns the document's \
                         metadata plus a preview of every chunk (truncated to 200 chars). v0.7.0+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "parameters": [
                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "DocumentId (UUID v7)." }
                    ],
                    "responses": {
                        "200": {
                            "description": "Document inspection result.",
                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/DocumentInspectResult" } } }
                        },
                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                },
                "delete": {
                    "summary": "Forget (soft-delete) one document",
                    "description":
                        "Equivalent to MCP tool `memory_forget_document`. Flips `documents.status` \
                         to `forgotten` and tombstones every chunk's HNSW rowid. The chunk rows \
                         survive in SQL for forensic value. v0.7.0+.",
                    "security": [{ "bearerAuth": [] }, {}],
                    "parameters": [
                        { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } }
                    ],
                    "responses": {
                        "200": {
                            "description": "Document soft-deleted; report counts chunks tombstoned.",
                            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ForgetDocumentReport" } } }
                        },
                        "400": { "description": "Malformed id.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "404": { "description": "No such document.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ApiError" } } } },
                        "401": { "description": "Missing or invalid bearer token (LAN-bound deployments only)." }
                    }
                }
            }
        }
    })
}

// ---------------------------------------------------------------------------
// Handlers
// ---------------------------------------------------------------------------

#[derive(Debug, Deserialize)]
struct RememberBody {
    content: String,
    #[serde(default)]
    source_type: Option<String>,
    #[serde(default)]
    source_id: Option<String>,
}

#[derive(Debug, Serialize)]
struct RememberResponse {
    memory_id: String,
}

async fn remember_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Json(body): Json<RememberBody>,
) -> Result<Json<RememberResponse>, ApiError> {
    let content = body.content.trim_end().to_string();
    if content.is_empty() {
        return Err(ApiError::bad_request("content must not be empty"));
    }
    let embedding = tenant.embedder().embed(&content).await.map_err(ApiError::from)?;
    let episode = Episode {
        memory_id: MemoryId::new(),
        ts_ms: chrono::Utc::now().timestamp_millis(),
        source_type: body.source_type.unwrap_or_else(|| "user_message".into()),
        source_id: body.source_id,
        content,
        encoding_context: EncodingContext::default(),
        provenance: None,
        confidence: Confidence::new(0.9).unwrap(),
        strength: 0.5,
        salience: 0.5,
        tier: Tier::Hot,
    };
    let mid = tenant
        .write()
        .remember_as(principal, episode, embedding)
        .await
        .map_err(ApiError::from)?;
    Ok(Json(RememberResponse {
        memory_id: mid.to_string(),
    }))
}

#[derive(Debug, Deserialize)]
struct RecallBody {
    query: String,
    #[serde(default = "default_limit")]
    limit: usize,
}

fn default_limit() -> usize {
    5
}

async fn recall_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Json(body): Json<RecallBody>,
) -> Result<Json<solo_query::RecallResult>, ApiError> {
    // solo_query::run_recall handles empty-query rejection (returns
    // InvalidInput → ApiError::bad_request(400)) and clamps limit
    // upstream of the embedder call.
    let result = solo_query::run_recall(tenant.as_ref(), principal, &body.query, body.limit)
        .await
        .map_err(ApiError::from)?;
    Ok(Json(result))
}

async fn inspect_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Path(id): Path<String>,
) -> Result<Json<solo_query::EpisodeRecord>, ApiError> {
    let mid = MemoryId::from_str(&id)
        .map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
    let row = solo_query::inspect_one(tenant.read(), tenant.audit(), principal, mid)
        .await
        .map_err(ApiError::from)?;
    Ok(Json(row))
}

// Path 1 derived-layer handlers (v0.4.0+). All three are GET-shaped:
// pure read-only queries against the Steward's outputs, query-string
// params for simple filters. Each handler delegates to a single
// solo_query::derived pipeline and returns the result Vec as JSON.
// Empty derived layer → 200 with `[]` body (parseable JSON array).

#[derive(Debug, Deserialize)]
struct ThemesQuery {
    #[serde(default)]
    window_days: Option<i64>,
    #[serde(default = "default_limit")]
    limit: usize,
}

async fn themes_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Query(q): Query<ThemesQuery>,
) -> Result<Json<Vec<solo_query::ThemeHit>>, ApiError> {
    let hits = solo_query::themes(
        tenant.read(),
        tenant.audit(),
        principal,
        q.window_days,
        q.limit,
    )
    .await
    .map_err(ApiError::from)?;
    Ok(Json(hits))
}

#[derive(Debug, Deserialize)]
struct FactsAboutQuery {
    subject: String,
    #[serde(default)]
    predicate: Option<String>,
    #[serde(default)]
    since_ms: Option<i64>,
    #[serde(default)]
    until_ms: Option<i64>,
    /// v0.5.1 Priority 8 — widen the query to also match rows where
    /// `subject` appears as the object. Default `false`.
    #[serde(default)]
    include_as_object: bool,
    #[serde(default = "default_limit")]
    limit: usize,
}

async fn facts_about_handler(
    State(s): State<SoloHttpState>,
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Query(q): Query<FactsAboutQuery>,
) -> Result<Json<Vec<solo_query::FactHit>>, ApiError> {
    if q.subject.trim().is_empty() {
        return Err(ApiError::bad_request("subject must not be empty"));
    }
    let hits = solo_query::facts_about(
        tenant.read(),
        tenant.audit(),
        principal,
        &q.subject,
        &s.user_aliases,
        q.include_as_object,
        q.predicate.as_deref(),
        q.since_ms,
        q.until_ms,
        q.limit,
    )
    .await
    .map_err(ApiError::from)?;
    Ok(Json(hits))
}

#[derive(Debug, Deserialize)]
struct ContradictionsQuery {
    #[serde(default = "default_limit")]
    limit: usize,
}

async fn contradictions_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Query(q): Query<ContradictionsQuery>,
) -> Result<Json<Vec<solo_query::ContradictionHit>>, ApiError> {
    let hits = solo_query::contradictions(tenant.read(), tenant.audit(), principal, q.limit)
        .await
        .map_err(ApiError::from)?;
    Ok(Json(hits))
}

#[derive(Debug, Deserialize, Default)]
struct InspectClusterQuery {
    /// Default `false` — episode `content` is truncated to
    /// `solo_query::EPISODE_TRUNCATE_CHARS` chars with a trailing `…`.
    /// `?full_content=true` returns each episode's content verbatim.
    #[serde(default)]
    full_content: bool,
}

async fn inspect_cluster_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Path(cluster_id): Path<String>,
    Query(q): Query<InspectClusterQuery>,
) -> Result<Json<solo_query::ClusterRecord>, ApiError> {
    if cluster_id.trim().is_empty() {
        return Err(ApiError::bad_request("cluster_id must not be empty"));
    }
    let record = solo_query::inspect_cluster(
        tenant.read(),
        tenant.audit(),
        principal,
        &cluster_id,
        q.full_content,
    )
    .await
    .map_err(ApiError::from)?;
    Ok(Json(record))
}

// ---------------------------------------------------------------------------
// Document handlers (v0.7.0 P6)
// ---------------------------------------------------------------------------

#[derive(Debug, Deserialize)]
struct IngestDocumentBody {
    /// Server-side absolute path to the file. Must be readable by the
    /// Solo process. The writer reads, parses, chunks, and embeds.
    path: String,
}

async fn ingest_document_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Json(body): Json<IngestDocumentBody>,
) -> Result<Json<solo_storage::IngestReport>, ApiError> {
    if body.path.trim().is_empty() {
        return Err(ApiError::bad_request("path must not be empty"));
    }
    let path = std::path::PathBuf::from(body.path);
    let chunk_config = solo_storage::document::ChunkConfig::default();
    let report = tenant
        .write()
        .ingest_document_as(principal, path, chunk_config)
        .await
        .map_err(ApiError::from)?;
    Ok(Json(report))
}

#[derive(Debug, Deserialize)]
struct SearchDocsBody {
    query: String,
    #[serde(default = "default_limit")]
    limit: usize,
}

async fn search_docs_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Json(body): Json<SearchDocsBody>,
) -> Result<Json<Vec<solo_query::DocSearchHit>>, ApiError> {
    let hits = solo_query::run_doc_search(tenant.as_ref(), principal, &body.query, body.limit)
        .await
        .map_err(ApiError::from)?;
    Ok(Json(hits))
}

async fn inspect_document_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Path(id): Path<String>,
) -> Result<Json<solo_query::DocumentInspectResult>, ApiError> {
    let doc_id = DocumentId::from_str(&id)
        .map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
    let result_opt =
        solo_query::inspect_document(tenant.read(), tenant.audit(), principal, &doc_id)
            .await
            .map_err(ApiError::from)?;
    match result_opt {
        Some(record) => Ok(Json(record)),
        None => Err(ApiError::not_found(format!("document {doc_id} not found"))),
    }
}

#[derive(Debug, Deserialize)]
struct ListDocumentsQuery {
    #[serde(default = "default_list_documents_limit")]
    limit: usize,
    #[serde(default)]
    offset: usize,
    #[serde(default)]
    include_forgotten: bool,
}

fn default_list_documents_limit() -> usize {
    20
}

async fn list_documents_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Query(q): Query<ListDocumentsQuery>,
) -> Result<Json<Vec<solo_query::DocumentSummary>>, ApiError> {
    let rows = solo_query::list_documents(
        tenant.read(),
        tenant.audit(),
        principal,
        q.limit,
        q.offset,
        q.include_forgotten,
    )
    .await
    .map_err(ApiError::from)?;
    Ok(Json(rows))
}

async fn forget_document_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Path(id): Path<String>,
) -> Result<Json<solo_storage::ForgetDocumentReport>, ApiError> {
    let doc_id = DocumentId::from_str(&id)
        .map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
    let report = tenant
        .write()
        .forget_document_as(principal, doc_id)
        .await
        .map_err(ApiError::from)?;
    Ok(Json(report))
}

#[derive(Debug, Deserialize)]
struct ForgetQuery {
    #[serde(default)]
    reason: Option<String>,
}

async fn forget_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    Path(id): Path<String>,
    Query(q): Query<ForgetQuery>,
) -> Result<StatusCode, ApiError> {
    let mid = MemoryId::from_str(&id).map_err(|e| ApiError::bad_request(format!("invalid id: {e}")))?;
    let reason = q.reason.unwrap_or_else(|| "http".into());
    tenant
        .write()
        .forget_as(principal, mid, reason)
        .await
        .map_err(ApiError::from)?;
    Ok(StatusCode::NO_CONTENT)
}

async fn consolidate_handler(
    TenantExtractor(tenant): TenantExtractor,
    AuditPrincipal(principal): AuditPrincipal,
    body: axum::body::Bytes,
) -> Result<Json<solo_storage::ConsolidationReport>, ApiError> {
    // Empty body = default scope (unbounded window). We parse via
    // `Bytes` rather than `Option<Json<T>>` because axum's `Json`
    // extractor 400s on an empty body when Content-Type is JSON
    // (it can't deserialize zero bytes as `T`), and the `Option`
    // wrapper doesn't reliably degrade that failure to `None`.
    let scope = if body.is_empty() {
        solo_storage::ConsolidationScope::default()
    } else {
        serde_json::from_slice(&body)
            .map_err(|e| ApiError::bad_request(format!("invalid JSON: {e}")))?
    };
    let report = tenant
        .write()
        .consolidate_as(principal, scope)
        .await
        .map_err(ApiError::from)?;
    Ok(Json(report))
}

#[derive(Debug, Deserialize)]
struct BackupBody {
    /// Server-side absolute path where the backup file should be
    /// written. Must be writable by the Solo process. Refuses to
    /// overwrite an existing file unless `force = true`.
    to: String,
    #[serde(default)]
    force: bool,
}

#[derive(Debug, Serialize)]
struct BackupResponse {
    path: String,
    elapsed_ms: u64,
}

async fn backup_handler(
    TenantExtractor(tenant): TenantExtractor,
    Json(body): Json<BackupBody>,
) -> Result<Json<BackupResponse>, ApiError> {
    use std::path::PathBuf;

    let dest = PathBuf::from(&body.to);
    if dest.as_os_str().is_empty() {
        return Err(ApiError::bad_request("`to` must not be empty"));
    }
    // CRITICAL ORDER: same-file refusal MUST come BEFORE `remove_file`.
    // The tenant's source DB path comes from the resolved TenantHandle.
    if solo_storage::paths_refer_to_same_file(tenant.db_path(), &dest) {
        return Err(ApiError::bad_request(format!(
            "destination {} is the same file as the source database; \
             refusing to run (would corrupt the live database)",
            dest.display()
        )));
    }
    if dest.exists() {
        if !body.force {
            return Err(ApiError::bad_request(format!(
                "destination {} exists; pass force=true to overwrite",
                dest.display()
            )));
        }
        std::fs::remove_file(&dest).map_err(|e| {
            ApiError::internal(format!(
                "remove existing destination {}: {e}",
                dest.display()
            ))
        })?;
    }
    if let Some(parent) = dest.parent() {
        if !parent.as_os_str().is_empty() && !parent.is_dir() {
            return Err(ApiError::bad_request(format!(
                "destination parent directory {} does not exist",
                parent.display()
            )));
        }
    }

    let started = std::time::Instant::now();
    tenant.write().backup(dest.clone()).await.map_err(ApiError::from)?;
    let elapsed_ms = started.elapsed().as_millis() as u64;

    Ok(Json(BackupResponse {
        path: dest.display().to_string(),
        elapsed_ms,
    }))
}

// ---------------------------------------------------------------------------
// Graph expand (v0.9.x — first /v1/graph/* endpoint for solo-web)
// ---------------------------------------------------------------------------
//
// `GET /v1/graph/expand?node_id=...&kind=...&limit=N` — read-only neighbor
// drill off any node. Supports four edge kinds:
//   * `cluster_member` — episodes ↔ clusters via `cluster_episodes`.
//   * `document_chunk` — documents ↔ chunks via `document_chunks.doc_id`.
//   * `triple`         — episodes ↔ entities via `triples` (subject_id /
//     object_id / source_episode_id added in migration 0007).
//   * `semantic`       — HNSW top-K similar episodes (re-embeds the source
//     episode's content via the tenant embedder, then calls the same
//     pipeline as `/memory/search`; cheaper than a separate embeddings-
//     table fetch path and reuses one well-tested code path).
//
// **Node-id prefix convention** (locked in this PR; the future
// `/v1/graph/nodes` + `/v1/graph/inspect/:id` endpoints will use the
// same scheme):
//   * `ep:<memory_id>`     — episode (memory_id = UUID v7)
//   * `doc:<doc_id>`       — document (doc_id   = UUID v7)
//   * `chunk:<chunk_id>`   — chunk    (chunk_id = UUID v7)
//   * `cl:<cluster_id>`    — cluster
//   * `ent:<value>`        — entity (synthetic — minted from a triple's
//     subject_id / object_id; value is the raw string verbatim, no
//     URL-encoding — `:` and other punctuation appear in real entity
//     ids in the wild).
//
// Entity nodes are synthetic: there's no `entities` table. They're derived
// on-the-fly from triples and only exist in the wire format. Two entity
// nodes with the same `ent:<value>` are the same node.
//
// **Read-only**: no audit emit (lesson #30 — graph expand is a derived view
// over already-audited primitives; the explicit-query audit events from
// `memory.recall` / `memory.inspect` / `memory.facts_about` cover the
// underlying reads).
//
// Tests live inline in `handler_tests` below.

const GRAPH_EXPAND_DEFAULT_LIMIT: u32 = 25;
const GRAPH_EXPAND_MAX_LIMIT: u32 = 100;

/// Edge-kind discriminator. Drives which expansion path runs and what edge
/// kind appears in the response.
#[derive(Debug, Clone, Copy, Deserialize)]
#[serde(rename_all = "snake_case")]
enum GraphExpandKind {
    ClusterMember,
    DocumentChunk,
    Triple,
    Semantic,
}

#[derive(Debug, Deserialize)]
struct GraphExpandQuery {
    node_id: String,
    kind: GraphExpandKind,
    #[serde(default)]
    limit: Option<u32>,
}

/// Source-node kind, derived from the `node_id` prefix.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum NodeKind {
    Episode,
    Document,
    Chunk,
    Cluster,
    Entity,
}

impl NodeKind {
    fn as_wire_str(self) -> &'static str {
        match self {
            Self::Episode => "episode",
            Self::Document => "document",
            Self::Chunk => "chunk",
            Self::Cluster => "cluster",
            Self::Entity => "entity",
        }
    }
}

/// Decompose `<prefix>:<value>` into (kind, raw value). Returns 400 on
/// unknown prefix / empty value / no `:`.
fn parse_node_id(raw: &str) -> Result<(NodeKind, &str), ApiError> {
    let (prefix, value) = raw.split_once(':').ok_or_else(|| {
        ApiError::bad_request(format!(
            "node_id must be `<prefix>:<value>` (one of ep:/doc:/chunk:/cl:/ent:); got {raw:?}"
        ))
    })?;
    if value.is_empty() {
        return Err(ApiError::bad_request(format!(
            "node_id value is empty after prefix: {raw:?}"
        )));
    }
    let kind = match prefix {
        "ep" => NodeKind::Episode,
        "doc" => NodeKind::Document,
        "chunk" => NodeKind::Chunk,
        "cl" => NodeKind::Cluster,
        "ent" => NodeKind::Entity,
        other => {
            return Err(ApiError::bad_request(format!(
                "unknown node_id prefix {other:?}; expected one of ep:/doc:/chunk:/cl:/ent:"
            )));
        }
    };
    Ok((kind, value))
}

/// One node in the graph-expand response. Mirrors solo-web's `GraphNode`
/// TS interface (see `solo-web/src/api/types.ts`).
#[derive(Debug, Serialize)]
struct GraphNode {
    id: String,
    kind: &'static str,
    label: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    ts_ms: Option<i64>,
    tenant_id: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    preview: Option<String>,
}

/// One edge. Mirrors `GraphEdge` in solo-web TS types. `id` is a composite
/// `${source}--${kind}--${target}` so the renderer can dedupe.
#[derive(Debug, Serialize)]
struct GraphEdge {
    id: String,
    source: String,
    target: String,
    kind: &'static str,
    #[serde(skip_serializing_if = "Option::is_none")]
    predicate: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    weight: Option<f32>,
}

#[derive(Debug, Serialize)]
struct GraphExpandResponse {
    nodes: Vec<GraphNode>,
    edges: Vec<GraphEdge>,
}

fn edge_id(source: &str, kind: &str, target: &str) -> String {
    format!("{source}--{kind}--{target}")
}

/// Episode summary needed to mint a `GraphNode` from an episode row.
#[derive(Debug)]
struct ExpandedEpisode {
    memory_id: String,
    ts_ms: i64,
    content: String,
}

/// Document summary.
#[derive(Debug)]
struct ExpandedDocument {
    doc_id: String,
    title: Option<String>,
    source: Option<String>,
    ingested_at_ms: i64,
}

/// Chunk summary.
#[derive(Debug)]
struct ExpandedChunk {
    chunk_id: String,
    chunk_index: i64,
    content: String,
}

fn truncate_preview(s: &str, max: usize) -> String {
    if s.chars().count() <= max {
        return s.to_string();
    }
    let mut out: String = s.chars().take(max - 1).collect();
    out.push('…');
    out
}

/// First-line label cap. Keeps payloads tight for the graph renderer
/// (labels are headings, not full content).
const GRAPH_LABEL_CHARS: usize = 80;
const GRAPH_PREVIEW_CHARS: usize = 200;

fn episode_label(content: &str) -> String {
    let first_line = content.lines().next().unwrap_or(content);
    truncate_preview(first_line, GRAPH_LABEL_CHARS)
}

fn graph_node_for_episode(tenant_id: &str, ep: &ExpandedEpisode) -> GraphNode {
    GraphNode {
        id: format!("ep:{}", ep.memory_id),
        kind: NodeKind::Episode.as_wire_str(),
        label: episode_label(&ep.content),
        ts_ms: Some(ep.ts_ms),
        tenant_id: tenant_id.to_string(),
        preview: Some(truncate_preview(&ep.content, GRAPH_PREVIEW_CHARS)),
    }
}

fn graph_node_for_document(tenant_id: &str, d: &ExpandedDocument) -> GraphNode {
    let label = d
        .title
        .clone()
        .or_else(|| d.source.clone())
        .unwrap_or_else(|| d.doc_id.clone());
    GraphNode {
        id: format!("doc:{}", d.doc_id),
        kind: NodeKind::Document.as_wire_str(),
        label: truncate_preview(&label, GRAPH_LABEL_CHARS),
        ts_ms: Some(d.ingested_at_ms),
        tenant_id: tenant_id.to_string(),
        preview: d.source.clone(),
    }
}

fn graph_node_for_chunk(tenant_id: &str, c: &ExpandedChunk) -> GraphNode {
    GraphNode {
        id: format!("chunk:{}", c.chunk_id),
        kind: NodeKind::Chunk.as_wire_str(),
        label: format!("chunk #{}: {}", c.chunk_index, episode_label(&c.content)),
        ts_ms: None,
        tenant_id: tenant_id.to_string(),
        preview: Some(truncate_preview(&c.content, GRAPH_PREVIEW_CHARS)),
    }
}

fn graph_node_for_cluster(
    tenant_id: &str,
    cluster_id: &str,
    abstraction: Option<&str>,
    created_at_ms: i64,
) -> GraphNode {
    let label = abstraction
        .map(|a| truncate_preview(a, GRAPH_LABEL_CHARS))
        .unwrap_or_else(|| format!("cluster {cluster_id}"));
    GraphNode {
        id: format!("cl:{cluster_id}"),
        kind: NodeKind::Cluster.as_wire_str(),
        label,
        ts_ms: Some(created_at_ms),
        tenant_id: tenant_id.to_string(),
        preview: abstraction.map(|a| truncate_preview(a, GRAPH_PREVIEW_CHARS)),
    }
}

fn graph_node_for_entity(tenant_id: &str, value: &str) -> GraphNode {
    GraphNode {
        id: format!("ent:{value}"),
        kind: NodeKind::Entity.as_wire_str(),
        label: truncate_preview(value, GRAPH_LABEL_CHARS),
        ts_ms: None,
        tenant_id: tenant_id.to_string(),
        preview: None,
    }
}

/// `GET /v1/graph/expand`. See module-level comments for the contract.
async fn graph_expand_handler(
    TenantExtractor(tenant): TenantExtractor,
    Query(q): Query<GraphExpandQuery>,
) -> Result<Json<GraphExpandResponse>, ApiError> {
    // Silent clamp at GRAPH_EXPAND_MAX_LIMIT — matches the rest of
    // solo-query's read pipelines (recall, themes, etc.). Documented in
    // the OpenAPI spec.
    let limit = q.limit.unwrap_or(GRAPH_EXPAND_DEFAULT_LIMIT);
    let limit = limit.clamp(1, GRAPH_EXPAND_MAX_LIMIT) as i64;

    let (node_kind, value) = parse_node_id(&q.node_id)?;
    let value = value.to_string();
    let node_id_full = q.node_id.clone();
    let tenant_id_str = tenant.tenant_id().to_string();

    match q.kind {
        GraphExpandKind::ClusterMember => {
            expand_cluster_member(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit)
                .await
        }
        GraphExpandKind::DocumentChunk => {
            expand_document_chunk(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit)
                .await
        }
        GraphExpandKind::Triple => {
            expand_triple(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit).await
        }
        GraphExpandKind::Semantic => {
            expand_semantic(&tenant, &tenant_id_str, node_kind, &value, &node_id_full, limit).await
        }
    }
    .map(Json)
}

// ---- cluster_member ----

async fn expand_cluster_member(
    tenant: &TenantHandle,
    tenant_id: &str,
    node_kind: NodeKind,
    value: &str,
    node_id_full: &str,
    limit: i64,
) -> Result<GraphExpandResponse, ApiError> {
    match node_kind {
        NodeKind::Episode => expand_cluster_member_from_episode(
            tenant,
            tenant_id,
            value.to_string(),
            node_id_full.to_string(),
            limit,
        )
        .await,
        NodeKind::Cluster => expand_cluster_member_from_cluster(
            tenant,
            tenant_id,
            value.to_string(),
            node_id_full.to_string(),
            limit,
        )
        .await,
        _ => Err(ApiError::bad_request(format!(
            "kind=cluster_member only valid for episode or cluster source nodes; got {}",
            node_kind.as_wire_str()
        ))),
    }
}

async fn expand_cluster_member_from_episode(
    tenant: &TenantHandle,
    tenant_id: &str,
    memory_id: String,
    node_id_full: String,
    limit: i64,
) -> Result<GraphExpandResponse, ApiError> {
    let memory_id_for_err = memory_id.clone();
    let rows: Vec<(String, Option<String>, i64)> = tenant
        .read()
        .interact(move |conn| {
            // First confirm the source episode exists in this tenant.
            let exists: i64 = conn.query_row(
                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
                rusqlite::params![&memory_id],
                |r| r.get(0),
            )?;
            if exists == 0 {
                return Ok(Vec::new());
            }
            let mut stmt = conn.prepare(
                "SELECT c.cluster_id, sa.content, c.created_at_ms
                   FROM cluster_episodes ce
                   JOIN clusters c ON c.cluster_id = ce.cluster_id
                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
                  WHERE ce.memory_id = ?1
                  ORDER BY c.created_at_ms DESC
                  LIMIT ?2",
            )?;
            let mapped = stmt
                .query_map(rusqlite::params![&memory_id, limit], |r| {
                    Ok((
                        r.get::<_, String>(0)?,
                        r.get::<_, Option<String>>(1)?,
                        r.get::<_, i64>(2)?,
                    ))
                })?
                .collect::<rusqlite::Result<Vec<_>>>()?;
            // Marker tuple to signal "episode found" via Vec emptiness +
            // an extra sentinel; we use a different shape:
            // pack the "found" flag via an out-of-band trick — actually
            // we re-query above. Keep it simple: confirm again here by
            // returning the rows; a missing episode short-circuits to
            // a 404 below via the `exists == 0` guard.
            Ok::<_, rusqlite::Error>(mapped)
        })
        .await
        .map_err(ApiError::from)?;

    // The interact() returns Vec<(...)>; but we need to distinguish "no
    // such episode" (→ 404) from "episode exists, has no clusters" (→
    // 200 with empty arrays). Re-run a cheap existence check separately
    // — we already inlined it above and returned `Vec::new()` on miss,
    // but a real miss is indistinguishable from "episode in zero
    // clusters". Use a separate existence probe.
    if rows.is_empty() {
        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
        return Ok(GraphExpandResponse {
            nodes: Vec::new(),
            edges: Vec::new(),
        });
    }

    let mut nodes = Vec::with_capacity(rows.len());
    let mut edges = Vec::with_capacity(rows.len());
    for (cluster_id, abstraction, created_at_ms) in rows {
        let target_id = format!("cl:{cluster_id}");
        edges.push(GraphEdge {
            id: edge_id(&node_id_full, "cluster_member", &target_id),
            source: node_id_full.clone(),
            target: target_id,
            kind: "cluster_member",
            predicate: None,
            weight: None,
        });
        nodes.push(graph_node_for_cluster(
            tenant_id,
            &cluster_id,
            abstraction.as_deref(),
            created_at_ms,
        ));
    }
    Ok(GraphExpandResponse { nodes, edges })
}

async fn expand_cluster_member_from_cluster(
    tenant: &TenantHandle,
    tenant_id: &str,
    cluster_id: String,
    node_id_full: String,
    limit: i64,
) -> Result<GraphExpandResponse, ApiError> {
    let cluster_id_for_err = cluster_id.clone();
    let rows: Vec<ExpandedEpisode> = tenant
        .read()
        .interact(move |conn| {
            let exists: i64 = conn.query_row(
                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
                rusqlite::params![&cluster_id],
                |r| r.get(0),
            )?;
            if exists == 0 {
                return Ok(Vec::new());
            }
            let mut stmt = conn.prepare(
                "SELECT e.memory_id, e.ts_ms, e.content
                   FROM cluster_episodes ce
                   JOIN episodes e ON e.memory_id = ce.memory_id
                  WHERE ce.cluster_id = ?1
                    AND e.status = 'active'
                  ORDER BY e.ts_ms DESC
                  LIMIT ?2",
            )?;
            let mapped = stmt
                .query_map(rusqlite::params![&cluster_id, limit], |r| {
                    Ok(ExpandedEpisode {
                        memory_id: r.get(0)?,
                        ts_ms: r.get(1)?,
                        content: r.get(2)?,
                    })
                })?
                .collect::<rusqlite::Result<Vec<_>>>()?;
            Ok::<_, rusqlite::Error>(mapped)
        })
        .await
        .map_err(ApiError::from)?;

    if rows.is_empty() {
        ensure_cluster_exists(tenant, &cluster_id_for_err, &node_id_full).await?;
        return Ok(GraphExpandResponse {
            nodes: Vec::new(),
            edges: Vec::new(),
        });
    }

    let mut nodes = Vec::with_capacity(rows.len());
    let mut edges = Vec::with_capacity(rows.len());
    for ep in rows {
        let target_id = format!("ep:{}", ep.memory_id);
        edges.push(GraphEdge {
            id: edge_id(&node_id_full, "cluster_member", &target_id),
            source: node_id_full.clone(),
            target: target_id,
            kind: "cluster_member",
            predicate: None,
            weight: None,
        });
        nodes.push(graph_node_for_episode(tenant_id, &ep));
    }
    Ok(GraphExpandResponse { nodes, edges })
}

// ---- document_chunk ----

async fn expand_document_chunk(
    tenant: &TenantHandle,
    tenant_id: &str,
    node_kind: NodeKind,
    value: &str,
    node_id_full: &str,
    limit: i64,
) -> Result<GraphExpandResponse, ApiError> {
    match node_kind {
        NodeKind::Document => expand_document_chunk_from_document(
            tenant,
            tenant_id,
            value.to_string(),
            node_id_full.to_string(),
            limit,
        )
        .await,
        NodeKind::Chunk => expand_document_chunk_from_chunk(
            tenant,
            tenant_id,
            value.to_string(),
            node_id_full.to_string(),
        )
        .await,
        _ => Err(ApiError::bad_request(format!(
            "kind=document_chunk only valid for document or chunk source nodes; got {}",
            node_kind.as_wire_str()
        ))),
    }
}

async fn expand_document_chunk_from_document(
    tenant: &TenantHandle,
    tenant_id: &str,
    doc_id: String,
    node_id_full: String,
    limit: i64,
) -> Result<GraphExpandResponse, ApiError> {
    let doc_id_for_err = doc_id.clone();
    let rows: Vec<ExpandedChunk> = tenant
        .read()
        .interact(move |conn| {
            let exists: i64 = conn.query_row(
                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
                rusqlite::params![&doc_id],
                |r| r.get(0),
            )?;
            if exists == 0 {
                return Ok(Vec::new());
            }
            let mut stmt = conn.prepare(
                "SELECT chunk_id, chunk_index, content
                   FROM document_chunks
                  WHERE doc_id = ?1
                  ORDER BY chunk_index ASC
                  LIMIT ?2",
            )?;
            let mapped = stmt
                .query_map(rusqlite::params![&doc_id, limit], |r| {
                    Ok(ExpandedChunk {
                        chunk_id: r.get(0)?,
                        chunk_index: r.get(1)?,
                        content: r.get(2)?,
                    })
                })?
                .collect::<rusqlite::Result<Vec<_>>>()?;
            Ok::<_, rusqlite::Error>(mapped)
        })
        .await
        .map_err(ApiError::from)?;

    if rows.is_empty() {
        ensure_document_exists(tenant, &doc_id_for_err, &node_id_full).await?;
        return Ok(GraphExpandResponse {
            nodes: Vec::new(),
            edges: Vec::new(),
        });
    }

    let mut nodes = Vec::with_capacity(rows.len());
    let mut edges = Vec::with_capacity(rows.len());
    for c in rows {
        let target_id = format!("chunk:{}", c.chunk_id);
        edges.push(GraphEdge {
            id: edge_id(&node_id_full, "document_chunk", &target_id),
            source: node_id_full.clone(),
            target: target_id,
            kind: "document_chunk",
            predicate: None,
            weight: None,
        });
        nodes.push(graph_node_for_chunk(tenant_id, &c));
    }
    Ok(GraphExpandResponse { nodes, edges })
}

async fn expand_document_chunk_from_chunk(
    tenant: &TenantHandle,
    tenant_id: &str,
    chunk_id: String,
    node_id_full: String,
) -> Result<GraphExpandResponse, ApiError> {
    let chunk_id_for_err = chunk_id.clone();
    let row: Option<ExpandedDocument> = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT d.doc_id, d.title, d.source, d.ingested_at_ms
                   FROM document_chunks c
                   JOIN documents d ON d.doc_id = c.doc_id
                  WHERE c.chunk_id = ?1",
                rusqlite::params![&chunk_id],
                |r| {
                    Ok(ExpandedDocument {
                        doc_id: r.get(0)?,
                        title: r.get(1)?,
                        source: r.get(2)?,
                        ingested_at_ms: r.get(3)?,
                    })
                },
            )
            .map(Some)
            .or_else(|e| match e {
                rusqlite::Error::QueryReturnedNoRows => Ok(None),
                other => Err(other),
            })
        })
        .await
        .map_err(ApiError::from)?;

    let d = row.ok_or_else(|| {
        ApiError::not_found(format!(
            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
        ))
    })?;
    let target_id = format!("doc:{}", d.doc_id);
    let edge = GraphEdge {
        id: edge_id(&node_id_full, "document_chunk", &target_id),
        source: node_id_full.clone(),
        target: target_id,
        kind: "document_chunk",
        predicate: None,
        weight: None,
    };
    let node = graph_node_for_document(tenant_id, &d);
    Ok(GraphExpandResponse {
        nodes: vec![node],
        edges: vec![edge],
    })
}

// ---- triple ----

async fn expand_triple(
    tenant: &TenantHandle,
    tenant_id: &str,
    node_kind: NodeKind,
    value: &str,
    node_id_full: &str,
    limit: i64,
) -> Result<GraphExpandResponse, ApiError> {
    match node_kind {
        NodeKind::Episode => expand_triple_from_episode(
            tenant,
            tenant_id,
            value.to_string(),
            node_id_full.to_string(),
            limit,
        )
        .await,
        NodeKind::Entity => expand_triple_from_entity(
            tenant,
            tenant_id,
            value.to_string(),
            node_id_full.to_string(),
            limit,
        )
        .await,
        _ => Err(ApiError::bad_request(format!(
            "kind=triple only valid for episode or entity source nodes; got {}",
            node_kind.as_wire_str()
        ))),
    }
}

#[derive(Debug)]
struct TripleRow {
    subject_id: String,
    predicate: String,
    object_id: String,
    confidence: f32,
}

async fn expand_triple_from_episode(
    tenant: &TenantHandle,
    tenant_id: &str,
    memory_id: String,
    node_id_full: String,
    limit: i64,
) -> Result<GraphExpandResponse, ApiError> {
    let memory_id_for_err = memory_id.clone();
    let rows: Vec<TripleRow> = tenant
        .read()
        .interact(move |conn| {
            // Episode rowid lookup (triples FK is INTEGER rowid, not memory_id).
            let rowid_opt: Option<i64> = conn
                .query_row(
                    "SELECT rowid FROM episodes WHERE memory_id = ?1",
                    rusqlite::params![&memory_id],
                    |r| r.get(0),
                )
                .map(Some)
                .or_else(|e| match e {
                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
                    other => Err(other),
                })?;
            let Some(rowid) = rowid_opt else {
                return Ok(Vec::new());
            };
            let mut stmt = conn.prepare(
                "SELECT subject_id, predicate, object_id, confidence
                   FROM triples
                  WHERE source_episode_id = ?1
                    AND status = 'active'
                  ORDER BY valid_from_ms DESC
                  LIMIT ?2",
            )?;
            let mapped = stmt
                .query_map(rusqlite::params![rowid, limit], |r| {
                    Ok(TripleRow {
                        subject_id: r.get(0)?,
                        predicate: r.get(1)?,
                        object_id: r.get(2)?,
                        confidence: r.get(3)?,
                    })
                })?
                .collect::<rusqlite::Result<Vec<_>>>()?;
            Ok::<_, rusqlite::Error>(mapped)
        })
        .await
        .map_err(ApiError::from)?;

    if rows.is_empty() {
        ensure_episode_exists(tenant, &memory_id_for_err, &node_id_full).await?;
        return Ok(GraphExpandResponse {
            nodes: Vec::new(),
            edges: Vec::new(),
        });
    }

    let mut nodes = Vec::new();
    let mut edges = Vec::new();
    let mut seen_entities: std::collections::HashSet<String> = Default::default();
    for t in rows {
        // Mint both endpoints as entity nodes. The source episode is
        // node_id_full; each triple becomes two edges (source→subj +
        // subj→obj) connected through the entity nodes, OR a single
        // edge labelled with the predicate from the source episode to
        // a representative entity. The TS schema treats `triple` as a
        // single edge with `predicate`; we emit one edge per triple:
        // source_episode → subject_entity (kind=triple, predicate=p),
        // plus one extra edge subject_entity → object_entity (also
        // kind=triple, same predicate) so a renderer can hop along the
        // SPO graph.
        let subj_id = format!("ent:{}", t.subject_id);
        let obj_id = format!("ent:{}", t.object_id);
        if seen_entities.insert(t.subject_id.clone()) {
            nodes.push(graph_node_for_entity(tenant_id, &t.subject_id));
        }
        if seen_entities.insert(t.object_id.clone()) {
            nodes.push(graph_node_for_entity(tenant_id, &t.object_id));
        }
        edges.push(GraphEdge {
            id: edge_id(&subj_id, "triple", &obj_id),
            source: subj_id,
            target: obj_id,
            kind: "triple",
            predicate: Some(t.predicate),
            weight: Some(t.confidence),
        });
    }
    Ok(GraphExpandResponse { nodes, edges })
}

async fn expand_triple_from_entity(
    tenant: &TenantHandle,
    tenant_id: &str,
    entity_value: String,
    node_id_full: String,
    limit: i64,
) -> Result<GraphExpandResponse, ApiError> {
    // Entity nodes are synthetic — there's no existence check we can
    // run. "Unknown entity" naturally resolves to an empty result.
    let entity_q = entity_value.clone();
    let rows: Vec<ExpandedEpisode> = tenant
        .read()
        .interact(move |conn| {
            // Find episodes whose triples reference this entity on either
            // side. JOIN against episodes.rowid via triples.source_episode_id.
            let mut stmt = conn.prepare(
                "SELECT DISTINCT e.memory_id, e.ts_ms, e.content
                   FROM triples t
                   JOIN episodes e ON e.rowid = t.source_episode_id
                  WHERE (t.subject_id = ?1 OR t.object_id = ?1)
                    AND t.status = 'active'
                    AND t.source_episode_id IS NOT NULL
                    AND e.status = 'active'
                  ORDER BY e.ts_ms DESC
                  LIMIT ?2",
            )?;
            let mapped = stmt
                .query_map(rusqlite::params![&entity_q, limit], |r| {
                    Ok(ExpandedEpisode {
                        memory_id: r.get(0)?,
                        ts_ms: r.get(1)?,
                        content: r.get(2)?,
                    })
                })?
                .collect::<rusqlite::Result<Vec<_>>>()?;
            Ok::<_, rusqlite::Error>(mapped)
        })
        .await
        .map_err(ApiError::from)?;

    // Empty result on entity expand is a valid 200 — the entity exists
    // only in the wire format; "no edges" is the right answer.
    let mut nodes = Vec::with_capacity(rows.len());
    let mut edges = Vec::with_capacity(rows.len());
    for ep in rows {
        let target_id = format!("ep:{}", ep.memory_id);
        edges.push(GraphEdge {
            id: edge_id(&node_id_full, "triple", &target_id),
            source: node_id_full.clone(),
            target: target_id,
            kind: "triple",
            predicate: None,
            weight: None,
        });
        nodes.push(graph_node_for_episode(tenant_id, &ep));
    }
    // Annotate _ to suppress unused (only used in match guard).
    let _ = entity_value;
    Ok(GraphExpandResponse { nodes, edges })
}

// ---- semantic ----

async fn expand_semantic(
    tenant: &TenantHandle,
    tenant_id: &str,
    node_kind: NodeKind,
    value: &str,
    node_id_full: &str,
    limit: i64,
) -> Result<GraphExpandResponse, ApiError> {
    if node_kind != NodeKind::Episode {
        return Err(ApiError::bad_request(format!(
            "kind=semantic only valid for episode source nodes; got {}",
            node_kind.as_wire_str()
        )));
    }
    let memory_id = value.to_string();
    let memory_id_q = memory_id.clone();
    // Fetch the source episode's content so we can re-embed it and call
    // the existing HNSW pipeline. Cheaper-than-extra-machinery: reuses
    // the well-tested `run_recall_inner` path that already filters
    // forgotten rows + decodes hnsw ids.
    let content: Option<String> = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
                rusqlite::params![&memory_id_q],
                |r| r.get::<_, String>(0),
            )
            .map(Some)
            .or_else(|e| match e {
                rusqlite::Error::QueryReturnedNoRows => Ok(None),
                other => Err(other),
            })
        })
        .await
        .map_err(ApiError::from)?;

    let content = content.ok_or_else(|| {
        ApiError::not_found(format!(
            "node_id {node_id_full:?} (memory_id {memory_id}) not found in current tenant"
        ))
    })?;

    // Pull one extra hit so we can drop self without losing user-requested
    // count. limit is already ≤ MAX_LIMIT; +1 stays within reason.
    let widened = (limit as usize).saturating_add(1).min(100);
    let result = solo_query::recall::run_recall_inner(
        tenant.embedder(),
        tenant.hnsw(),
        tenant.read(),
        &content,
        widened,
    )
    .await
    .map_err(ApiError::from)?;

    let mut nodes = Vec::new();
    let mut edges = Vec::new();
    for hit in result.hits.into_iter() {
        if hit.memory_id == memory_id {
            // Skip self.
            continue;
        }
        if nodes.len() as i64 >= limit {
            break;
        }
        // The HNSW `cos_distance` is a distance (smaller = more similar).
        // Convert to a weight in [0, 1] (larger = more similar) for the
        // wire format: weight = (1 - distance).max(0).
        let weight = (1.0 - hit.cos_distance).max(0.0);
        let target_id = format!("ep:{}", hit.memory_id);
        edges.push(GraphEdge {
            id: edge_id(node_id_full, "semantic", &target_id),
            source: node_id_full.to_string(),
            target: target_id,
            kind: "semantic",
            predicate: None,
            weight: Some(weight),
        });
        nodes.push(GraphNode {
            id: format!("ep:{}", hit.memory_id),
            kind: NodeKind::Episode.as_wire_str(),
            label: episode_label(&hit.content),
            ts_ms: None,
            tenant_id: tenant_id.to_string(),
            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
        });
    }
    Ok(GraphExpandResponse { nodes, edges })
}

// ---- existence checks ----

/// 404 if the memory_id has no row in this tenant's `episodes` table.
async fn ensure_episode_exists(
    tenant: &TenantHandle,
    memory_id: &str,
    node_id_full: &str,
) -> Result<(), ApiError> {
    let memory_id_q = memory_id.to_string();
    let exists: i64 = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT COUNT(*) FROM episodes WHERE memory_id = ?1",
                rusqlite::params![&memory_id_q],
                |r| r.get(0),
            )
        })
        .await
        .map_err(ApiError::from)?;
    if exists == 0 {
        return Err(ApiError::not_found(format!(
            "node_id {node_id_full:?} not found in current tenant"
        )));
    }
    Ok(())
}

async fn ensure_cluster_exists(
    tenant: &TenantHandle,
    cluster_id: &str,
    node_id_full: &str,
) -> Result<(), ApiError> {
    let cluster_id_q = cluster_id.to_string();
    let exists: i64 = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT COUNT(*) FROM clusters WHERE cluster_id = ?1",
                rusqlite::params![&cluster_id_q],
                |r| r.get(0),
            )
        })
        .await
        .map_err(ApiError::from)?;
    if exists == 0 {
        return Err(ApiError::not_found(format!(
            "node_id {node_id_full:?} not found in current tenant"
        )));
    }
    Ok(())
}

async fn ensure_document_exists(
    tenant: &TenantHandle,
    doc_id: &str,
    node_id_full: &str,
) -> Result<(), ApiError> {
    let doc_id_q = doc_id.to_string();
    let exists: i64 = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT COUNT(*) FROM documents WHERE doc_id = ?1",
                rusqlite::params![&doc_id_q],
                |r| r.get(0),
            )
        })
        .await
        .map_err(ApiError::from)?;
    if exists == 0 {
        return Err(ApiError::not_found(format!(
            "node_id {node_id_full:?} not found in current tenant"
        )));
    }
    Ok(())
}

// ---------------------------------------------------------------------------
// Graph nodes + edges — paginated catalog reads (v0.10.0)
//
// `GET /v1/graph/nodes` and `GET /v1/graph/edges` are the bundle that
// powers solo-web's initial graph render. Both are read-only, both
// share the same tenant / auth / cursor scaffolding, both inherit the
// node-id prefix convention from `/v1/graph/expand` (ep:/doc:/chunk:/cl:/ent:).
//
// See `docs/dev-log/0114-graph-nodes-edges-impl.md` for the design
// notes (cursor format, entity scan strategy, semantic-edge rejection
// rationale, UNION pagination shape).
// ---------------------------------------------------------------------------

const GRAPH_NODES_DEFAULT_LIMIT: u32 = 100;
const GRAPH_NODES_MAX_LIMIT: u32 = 1000;
const GRAPH_EDGES_DEFAULT_LIMIT: u32 = 200;
const GRAPH_EDGES_MAX_LIMIT: u32 = 2000;
const GRAPH_ENTITY_CAP: usize = 200;

/// Header set when the entity scan hit `GRAPH_ENTITY_CAP` and lower-
/// frequency entities were dropped from the response. Clients can show
/// "entities truncated" UX without parsing the body.
const ENTITY_CAP_HEADER: &str = "x-solo-entity-cap-reached";

#[derive(Debug, Deserialize)]
struct GraphNodesQuery {
    /// Comma-separated kinds. Empty/missing = all five kinds. Repeated
    /// `?kind=` query params are NOT supported by axum's `Query<T>`
    /// extractor for `Option<String>` (it picks one) — comma-separated
    /// is documented + simpler. Values: episode|document|chunk|cluster|entity.
    #[serde(default)]
    kind: Option<String>,
    #[serde(default)]
    since_ms: Option<i64>,
    #[serde(default)]
    until_ms: Option<i64>,
    #[serde(default)]
    limit: Option<u32>,
    #[serde(default)]
    cursor: Option<String>,
}

#[derive(Debug, Deserialize)]
struct GraphEdgesQuery {
    #[serde(default)]
    node_id: Option<String>,
    /// Comma-separated. Default = all kinds EXCEPT semantic.
    /// Values: triple|document_chunk|cluster_member|semantic.
    #[serde(default)]
    r#type: Option<String>,
    #[serde(default)]
    limit: Option<u32>,
    #[serde(default)]
    cursor: Option<String>,
}

#[derive(Debug, Serialize)]
struct GraphNodesResponse {
    nodes: Vec<GraphNode>,
    #[serde(skip_serializing_if = "Option::is_none")]
    next_cursor: Option<String>,
}

#[derive(Debug, Serialize)]
struct GraphEdgesResponse {
    edges: Vec<GraphEdge>,
    #[serde(skip_serializing_if = "Option::is_none")]
    next_cursor: Option<String>,
}

/// Decode the `kind` filter from the query string. Returns the set of
/// kinds the caller wants (all five when filter absent / empty). 400 on
/// unknown kind.
fn parse_node_kind_filter(raw: Option<&str>) -> Result<Vec<NodeKind>, ApiError> {
    let raw = raw.unwrap_or("").trim();
    if raw.is_empty() {
        return Ok(vec![
            NodeKind::Episode,
            NodeKind::Document,
            NodeKind::Chunk,
            NodeKind::Cluster,
            NodeKind::Entity,
        ]);
    }
    let mut out = Vec::new();
    for token in raw.split(',') {
        let token = token.trim();
        if token.is_empty() {
            continue;
        }
        let kind = match token {
            "episode" => NodeKind::Episode,
            "document" => NodeKind::Document,
            "chunk" => NodeKind::Chunk,
            "cluster" => NodeKind::Cluster,
            "entity" => NodeKind::Entity,
            other => {
                return Err(ApiError::bad_request(format!(
                    "unknown node kind {other:?}; expected one of episode/document/chunk/cluster/entity"
                )));
            }
        };
        if !out.contains(&kind) {
            out.push(kind);
        }
    }
    if out.is_empty() {
        return Err(ApiError::bad_request(
            "kind filter is empty after parsing; either omit or list at least one kind",
        ));
    }
    Ok(out)
}

/// Edge-kind discriminator on `/v1/graph/edges`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum EdgeKind {
    Triple,
    DocumentChunk,
    ClusterMember,
}

impl EdgeKind {
    /// Sort-stable kind ordering for pagination. Lower runs first.
    fn order_idx(self) -> u8 {
        match self {
            Self::Triple => 0,
            Self::DocumentChunk => 1,
            Self::ClusterMember => 2,
        }
    }
}

fn parse_edge_kind_filter(raw: Option<&str>) -> Result<Vec<EdgeKind>, ApiError> {
    let raw = raw.unwrap_or("").trim();
    if raw.is_empty() {
        // Default = all three concrete kinds; semantic is opt-in via
        // /v1/graph/neighbors/:id (per scoping doc §3 Decision B).
        return Ok(vec![
            EdgeKind::Triple,
            EdgeKind::DocumentChunk,
            EdgeKind::ClusterMember,
        ]);
    }
    let mut out = Vec::new();
    for token in raw.split(',') {
        let token = token.trim();
        if token.is_empty() {
            continue;
        }
        let kind = match token {
            "triple" => EdgeKind::Triple,
            "document_chunk" => EdgeKind::DocumentChunk,
            "cluster_member" => EdgeKind::ClusterMember,
            "semantic" => {
                // semantic edges aren't precomputed; they're HNSW queries
                // at request time. Wrong endpoint.
                return Err(ApiError::bad_request(
                    "semantic edges are available via /v1/graph/neighbors/:id?kind=semantic, not /v1/graph/edges (semantic edges aren't precomputed; they're query-time HNSW lookups)",
                ));
            }
            other => {
                return Err(ApiError::bad_request(format!(
                    "unknown edge type {other:?}; expected one of triple/document_chunk/cluster_member"
                )));
            }
        };
        if !out.contains(&kind) {
            out.push(kind);
        }
    }
    if out.is_empty() {
        return Err(ApiError::bad_request(
            "type filter is empty after parsing; either omit or list at least one type",
        ));
    }
    Ok(out)
}

/// Opaque cursor for `/v1/graph/nodes`. Encodes the last item's
/// `(ts_ms, id)` so the next page is `WHERE (ts_ms, id) < (cursor.ts_ms,
/// cursor.id)` under sort `ts_ms DESC, id ASC`.
#[derive(Debug, Serialize, Deserialize)]
struct NodesCursor {
    ts_ms: i64,
    id: String,
}

/// Opaque cursor for `/v1/graph/edges`. Encodes the last item's
/// `(kind_idx, sub_id)` so the next page resumes at `> cursor` under
/// sort `(kind_idx ASC, sub_id ASC)`. `sub_id` is the per-kind stable
/// row id (triple_id for triples, chunk_id for document_chunk, the
/// composite `cluster_id||memory_id` string for cluster_member).
#[derive(Debug, Serialize, Deserialize)]
struct EdgesCursor {
    kind_idx: u8,
    sub_id: String,
}

fn encode_cursor<T: Serialize>(value: &T) -> Result<String, ApiError> {
    use base64::Engine;
    let json = serde_json::to_vec(value).map_err(|e| {
        ApiError::internal(format!("cursor serialize: {e}"))
    })?;
    Ok(base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(json))
}

fn decode_cursor<T: for<'de> Deserialize<'de>>(raw: &str) -> Result<T, ApiError> {
    use base64::Engine;
    let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
        .decode(raw.as_bytes())
        .map_err(|e| ApiError::bad_request(format!("cursor: bad base64: {e}")))?;
    serde_json::from_slice::<T>(&bytes)
        .map_err(|e| ApiError::bad_request(format!("cursor: bad JSON payload: {e}")))
}

/// Internal staging row for the nodes endpoint. Carries the GraphNode
/// plus the sort key so we can merge all kinds before applying the
/// pagination cut.
#[derive(Debug)]
struct StagingNode {
    node: GraphNode,
    sort_ts_ms: i64,
    sort_id: String,
}

/// Apply `ts_ms DESC, id ASC` ordering. (Newest first, deterministic
/// tie-break on id.)
fn cmp_node_sort_keys(a: (i64, &str), b: (i64, &str)) -> std::cmp::Ordering {
    // ts_ms DESC: invert
    match b.0.cmp(&a.0) {
        std::cmp::Ordering::Equal => a.1.cmp(b.1), // id ASC
        other => other,
    }
}

/// True if `(ts_ms, id)` strictly comes AFTER `cursor` under the canonical
/// sort `ts_ms DESC, id ASC` — i.e. is admissible into a page following
/// the cursor.
fn node_passes_cursor(ts_ms: i64, id: &str, cursor: &NodesCursor) -> bool {
    cmp_node_sort_keys((ts_ms, id), (cursor.ts_ms, cursor.id.as_str()))
        == std::cmp::Ordering::Greater
}

// --- Per-kind row fetchers (each runs a bounded query, applies the time
//     filter, returns rows already sorted `ts_ms DESC, id ASC`).

#[derive(Debug)]
struct NodeRowEp {
    memory_id: String,
    ts_ms: i64,
    content: String,
}

fn fetch_episodes_for_nodes(
    conn: &rusqlite::Connection,
    since_ms: Option<i64>,
    until_ms: Option<i64>,
    cursor: Option<&NodesCursor>,
    limit: i64,
) -> rusqlite::Result<Vec<NodeRowEp>> {
    let mut sql = String::from(
        "SELECT memory_id, ts_ms, content
           FROM episodes
          WHERE status = 'active'",
    );
    let mut params: Vec<rusqlite::types::Value> = Vec::new();
    if let Some(s) = since_ms {
        sql.push_str(" AND ts_ms >= ?");
        params.push(s.into());
    }
    if let Some(u) = until_ms {
        sql.push_str(" AND ts_ms <= ?");
        params.push(u.into());
    }
    // Cursor pre-filter: under sort `ts_ms DESC, prefixed_id ASC`,
    // anything strictly newer than the cursor's ts_ms is in a previous
    // page; rows with equal ts_ms may or may not be (depends on the
    // cross-kind ordering). The post-merge step applies the full
    // `(ts_ms, prefixed_id)` comparison; here we just discard rows
    // that can't possibly survive.
    if let Some(cur) = cursor {
        sql.push_str(" AND ts_ms <= ?");
        params.push(cur.ts_ms.into());
    }
    sql.push_str(" ORDER BY ts_ms DESC, memory_id ASC LIMIT ?");
    params.push(limit.into());
    let mut stmt = conn.prepare(&sql)?;
    let rows: Vec<NodeRowEp> = stmt
        .query_map(rusqlite::params_from_iter(params), |r| {
            Ok(NodeRowEp {
                memory_id: r.get(0)?,
                ts_ms: r.get(1)?,
                content: r.get(2)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(rows)
}

#[derive(Debug)]
struct NodeRowDoc {
    doc_id: String,
    title: Option<String>,
    source: Option<String>,
    ingested_at_ms: i64,
}

fn fetch_documents_for_nodes(
    conn: &rusqlite::Connection,
    since_ms: Option<i64>,
    until_ms: Option<i64>,
    cursor: Option<&NodesCursor>,
    limit: i64,
) -> rusqlite::Result<Vec<NodeRowDoc>> {
    let mut sql = String::from(
        "SELECT doc_id, title, source, ingested_at_ms
           FROM documents
          WHERE status = 'active'",
    );
    let mut params: Vec<rusqlite::types::Value> = Vec::new();
    if let Some(s) = since_ms {
        sql.push_str(" AND ingested_at_ms >= ?");
        params.push(s.into());
    }
    if let Some(u) = until_ms {
        sql.push_str(" AND ingested_at_ms <= ?");
        params.push(u.into());
    }
    if let Some(cur) = cursor {
        sql.push_str(" AND ingested_at_ms <= ?");
        params.push(cur.ts_ms.into());
    }
    sql.push_str(" ORDER BY ingested_at_ms DESC, doc_id ASC LIMIT ?");
    params.push(limit.into());
    let mut stmt = conn.prepare(&sql)?;
    let rows: Vec<NodeRowDoc> = stmt
        .query_map(rusqlite::params_from_iter(params), |r| {
            Ok(NodeRowDoc {
                doc_id: r.get(0)?,
                title: r.get(1)?,
                source: r.get(2)?,
                ingested_at_ms: r.get(3)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(rows)
}

#[derive(Debug)]
struct NodeRowChunk {
    chunk_id: String,
    chunk_index: i64,
    content: String,
    created_at_ms: i64,
}

fn fetch_chunks_for_nodes(
    conn: &rusqlite::Connection,
    since_ms: Option<i64>,
    until_ms: Option<i64>,
    cursor: Option<&NodesCursor>,
    limit: i64,
) -> rusqlite::Result<Vec<NodeRowChunk>> {
    // Filter by `document_chunks.created_at_ms`; chunks of forgotten
    // documents are filtered out by the join on `documents.status`.
    let mut sql = String::from(
        "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
           FROM document_chunks c
           JOIN documents d ON d.doc_id = c.doc_id
          WHERE d.status = 'active'",
    );
    let mut params: Vec<rusqlite::types::Value> = Vec::new();
    if let Some(s) = since_ms {
        sql.push_str(" AND c.created_at_ms >= ?");
        params.push(s.into());
    }
    if let Some(u) = until_ms {
        sql.push_str(" AND c.created_at_ms <= ?");
        params.push(u.into());
    }
    if let Some(cur) = cursor {
        sql.push_str(" AND c.created_at_ms <= ?");
        params.push(cur.ts_ms.into());
    }
    sql.push_str(" ORDER BY c.created_at_ms DESC, c.chunk_id ASC LIMIT ?");
    params.push(limit.into());
    let mut stmt = conn.prepare(&sql)?;
    let rows: Vec<NodeRowChunk> = stmt
        .query_map(rusqlite::params_from_iter(params), |r| {
            Ok(NodeRowChunk {
                chunk_id: r.get(0)?,
                chunk_index: r.get(1)?,
                content: r.get(2)?,
                created_at_ms: r.get(3)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(rows)
}

#[derive(Debug)]
struct NodeRowCluster {
    cluster_id: String,
    abstraction: Option<String>,
    created_at_ms: i64,
}

fn fetch_clusters_for_nodes(
    conn: &rusqlite::Connection,
    since_ms: Option<i64>,
    until_ms: Option<i64>,
    cursor: Option<&NodesCursor>,
    limit: i64,
) -> rusqlite::Result<Vec<NodeRowCluster>> {
    // clusters has no `status` column; LEFT JOIN abstractions for the
    // optional label.
    let mut sql = String::from(
        "SELECT c.cluster_id, sa.content, c.created_at_ms
           FROM clusters c
           LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
          WHERE 1=1",
    );
    let mut params: Vec<rusqlite::types::Value> = Vec::new();
    if let Some(s) = since_ms {
        sql.push_str(" AND c.created_at_ms >= ?");
        params.push(s.into());
    }
    if let Some(u) = until_ms {
        sql.push_str(" AND c.created_at_ms <= ?");
        params.push(u.into());
    }
    if let Some(cur) = cursor {
        sql.push_str(" AND c.created_at_ms <= ?");
        params.push(cur.ts_ms.into());
    }
    sql.push_str(" ORDER BY c.created_at_ms DESC, c.cluster_id ASC LIMIT ?");
    params.push(limit.into());
    let mut stmt = conn.prepare(&sql)?;
    let rows: Vec<NodeRowCluster> = stmt
        .query_map(rusqlite::params_from_iter(params), |r| {
            Ok(NodeRowCluster {
                cluster_id: r.get(0)?,
                abstraction: r.get(1)?,
                created_at_ms: r.get(2)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(rows)
}

#[derive(Debug)]
struct NodeRowEntity {
    value: String,
    ref_count: i64,
    first_seen_ms: i64,
}

/// Synthesize entity nodes from the triples table. Caps result at
/// `GRAPH_ENTITY_CAP`, ordered by `ref_count DESC` so the loudest
/// entities make the cut. Returns (rows, cap_reached).
///
/// **Cost**: this is O(N) over active triples per request. For tenants
/// with >100k triples this can be noticeable; v0.10.x can cache the
/// rollup if profiling justifies it. The 200-row cap keeps the wire
/// payload bounded regardless.
fn fetch_entities_for_nodes(
    conn: &rusqlite::Connection,
    since_ms: Option<i64>,
    until_ms: Option<i64>,
    cursor: Option<&NodesCursor>,
) -> rusqlite::Result<(Vec<NodeRowEntity>, bool)> {
    // Pull subject + object columns, group by value, compute count + min
    // ts_ms. UNION ALL the two columns into a single aggregation. Apply
    // time filter against `valid_from_ms` (the closest analogue to "when
    // was this entity first referenced").
    let mut sql = String::from(
        "WITH all_refs AS (
            SELECT subject_id AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
            UNION ALL
            SELECT object_id  AS value, valid_from_ms AS ts_ms FROM triples WHERE status = 'active'
         )
         SELECT value, COUNT(*) AS ref_count, MIN(ts_ms) AS first_seen_ms
           FROM all_refs
          WHERE 1=1",
    );
    let mut params: Vec<rusqlite::types::Value> = Vec::new();
    if let Some(s) = since_ms {
        sql.push_str(" AND ts_ms >= ?");
        params.push(s.into());
    }
    if let Some(u) = until_ms {
        sql.push_str(" AND ts_ms <= ?");
        params.push(u.into());
    }
    // Cursor: drop entities whose first_seen_ms strictly newer than the
    // cursor. We can't predicate on COUNT() until after GROUP BY, so the
    // cap-applicable filter sits in the HAVING clause.
    sql.push_str(" GROUP BY value");
    if let Some(ts) = cursor.map(|c| c.ts_ms) {
        sql.push_str(" HAVING MIN(ts_ms) <= ?");
        params.push(ts.into());
    }
    // Over-fetch by one to detect "cap reached".
    let want = GRAPH_ENTITY_CAP as i64 + 1;
    sql.push_str(" ORDER BY ref_count DESC, value ASC LIMIT ?");
    params.push(want.into());
    let mut stmt = conn.prepare(&sql)?;
    let rows: Vec<NodeRowEntity> = stmt
        .query_map(rusqlite::params_from_iter(params), |r| {
            Ok(NodeRowEntity {
                value: r.get(0)?,
                ref_count: r.get(1)?,
                first_seen_ms: r.get(2)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    let cap_reached = rows.len() > GRAPH_ENTITY_CAP;
    let mut trimmed = rows;
    if cap_reached {
        trimmed.truncate(GRAPH_ENTITY_CAP);
    }
    Ok((trimmed, cap_reached))
}

/// `GET /v1/graph/nodes`. Paginated node catalog across the tenant.
/// See module-level comments for the contract.
async fn graph_nodes_handler(
    TenantExtractor(tenant): TenantExtractor,
    Query(q): Query<GraphNodesQuery>,
) -> Result<Response, ApiError> {
    let limit = q.limit.unwrap_or(GRAPH_NODES_DEFAULT_LIMIT);
    let limit = limit.clamp(1, GRAPH_NODES_MAX_LIMIT);
    let kinds = parse_node_kind_filter(q.kind.as_deref())?;
    let since_ms = q.since_ms;
    let until_ms = q.until_ms;
    if let (Some(s), Some(u)) = (since_ms, until_ms) {
        if s > u {
            return Err(ApiError::bad_request(format!(
                "since_ms ({s}) must be <= until_ms ({u})"
            )));
        }
    }
    let cursor = match q.cursor.as_deref() {
        None => None,
        Some("") => None,
        Some(raw) => Some(decode_cursor::<NodesCursor>(raw)?),
    };
    let want_episode = kinds.contains(&NodeKind::Episode);
    let want_document = kinds.contains(&NodeKind::Document);
    let want_chunk = kinds.contains(&NodeKind::Chunk);
    let want_cluster = kinds.contains(&NodeKind::Cluster);
    let want_entity = kinds.contains(&NodeKind::Entity);

    // Over-fetch `limit + 2` per kind:
    //   * `+1` so the merge step can detect "more rows available beyond
    //     this page" → emits a `next_cursor` instead of None.
    //   * `+1` again because the SQL pre-filter `ts_ms <= cursor.ts_ms`
    //     can pull the previous page's last item back in; the post-merge
    //     cursor predicate drops it, costing one row of headroom.
    // The entity cap stays at GRAPH_ENTITY_CAP — entities are bounded
    // independently by the response cap, not the page limit.
    let per_kind_limit = (limit as i64).saturating_add(2);
    let tenant_id_for_blocking = tenant.tenant_id().to_string();
    let cursor_clone = cursor.as_ref().map(|c| NodesCursor {
        ts_ms: c.ts_ms,
        id: c.id.clone(),
    });

    let (mut staged, cap_reached) = tenant
        .read()
        .interact(move |conn| {
            let mut staged: Vec<StagingNode> = Vec::new();
            let mut cap_reached = false;
            let cursor_ref = cursor_clone.as_ref();

            if want_episode {
                let eps = fetch_episodes_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
                for ep in eps {
                    let id = format!("ep:{}", ep.memory_id);
                    let exp = ExpandedEpisode {
                        memory_id: ep.memory_id,
                        ts_ms: ep.ts_ms,
                        content: ep.content,
                    };
                    let node = graph_node_for_episode(&tenant_id_for_blocking, &exp);
                    staged.push(StagingNode {
                        sort_ts_ms: ep.ts_ms,
                        sort_id: id.clone(),
                        node,
                    });
                }
            }
            if want_document {
                let docs = fetch_documents_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
                for d in docs {
                    let id = format!("doc:{}", d.doc_id);
                    let exp = ExpandedDocument {
                        doc_id: d.doc_id,
                        title: d.title,
                        source: d.source,
                        ingested_at_ms: d.ingested_at_ms,
                    };
                    let node = graph_node_for_document(&tenant_id_for_blocking, &exp);
                    staged.push(StagingNode {
                        sort_ts_ms: d.ingested_at_ms,
                        sort_id: id.clone(),
                        node,
                    });
                }
            }
            if want_chunk {
                let chunks = fetch_chunks_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
                for c in chunks {
                    let id = format!("chunk:{}", c.chunk_id);
                    let exp = ExpandedChunk {
                        chunk_id: c.chunk_id,
                        chunk_index: c.chunk_index,
                        content: c.content,
                    };
                    // graph_node_for_chunk sets ts_ms = None for the
                    // wire format (chunks don't have a natural user-
                    // facing timestamp); but for sorting we use the
                    // row's created_at_ms.
                    let mut node = graph_node_for_chunk(&tenant_id_for_blocking, &exp);
                    node.ts_ms = Some(c.created_at_ms);
                    staged.push(StagingNode {
                        sort_ts_ms: c.created_at_ms,
                        sort_id: id.clone(),
                        node,
                    });
                }
            }
            if want_cluster {
                let cls = fetch_clusters_for_nodes(conn, since_ms, until_ms, cursor_ref, per_kind_limit)?;
                for c in cls {
                    let id = format!("cl:{}", c.cluster_id);
                    let node = graph_node_for_cluster(
                        &tenant_id_for_blocking,
                        &c.cluster_id,
                        c.abstraction.as_deref(),
                        c.created_at_ms,
                    );
                    staged.push(StagingNode {
                        sort_ts_ms: c.created_at_ms,
                        sort_id: id.clone(),
                        node,
                    });
                }
            }
            if want_entity {
                let (ents, was_cap_reached) =
                    fetch_entities_for_nodes(conn, since_ms, until_ms, cursor_ref)?;
                cap_reached = was_cap_reached;
                for e in ents {
                    let id = format!("ent:{}", e.value);
                    let mut node = graph_node_for_entity(&tenant_id_for_blocking, &e.value);
                    node.ts_ms = Some(e.first_seen_ms);
                    node.preview =
                        Some(format!("Referenced in {} triples", e.ref_count));
                    staged.push(StagingNode {
                        sort_ts_ms: e.first_seen_ms,
                        sort_id: id.clone(),
                        node,
                    });
                }
            }
            Ok::<_, rusqlite::Error>((staged, cap_reached))
        })
        .await
        .map_err(ApiError::from)?;

    // Apply cursor filter.
    if let Some(cur) = &cursor {
        staged.retain(|s| node_passes_cursor(s.sort_ts_ms, &s.sort_id, cur));
    }

    // Sort `ts_ms DESC, id ASC`.
    staged.sort_by(|a, b| {
        cmp_node_sort_keys((a.sort_ts_ms, &a.sort_id), (b.sort_ts_ms, &b.sort_id))
    });

    // Apply page limit + compute next_cursor.
    let limit_us = limit as usize;
    let next_cursor = if staged.len() > limit_us {
        let last = &staged[limit_us - 1];
        Some(NodesCursor {
            ts_ms: last.sort_ts_ms,
            id: last.sort_id.clone(),
        })
    } else {
        None
    };
    staged.truncate(limit_us);

    let next_cursor_str = match next_cursor {
        Some(c) => Some(encode_cursor(&c)?),
        None => None,
    };

    let nodes: Vec<GraphNode> = staged.into_iter().map(|s| s.node).collect();
    let payload = GraphNodesResponse {
        nodes,
        next_cursor: next_cursor_str,
    };

    // Attach the entity-cap header so clients can show truncation UX
    // without parsing the body.
    let mut response = Json(payload).into_response();
    if cap_reached {
        response
            .headers_mut()
            .insert(ENTITY_CAP_HEADER, HeaderValue::from_static("true"));
    }
    Ok(response)
}

// --- /v1/graph/edges --------------------------------------------------

#[derive(Debug)]
struct StagingEdge {
    edge: GraphEdge,
    kind_idx: u8,
    sub_id: String,
}

fn cmp_edge_sort_keys(a: (u8, &str), b: (u8, &str)) -> std::cmp::Ordering {
    match a.0.cmp(&b.0) {
        std::cmp::Ordering::Equal => a.1.cmp(b.1),
        other => other,
    }
}

fn edge_passes_cursor(kind_idx: u8, sub_id: &str, cursor: &EdgesCursor) -> bool {
    cmp_edge_sort_keys((kind_idx, sub_id), (cursor.kind_idx, cursor.sub_id.as_str()))
        == std::cmp::Ordering::Greater
}

/// Whether the supplied focus `node_id` (kind, value) matches an edge's
/// (source, target) endpoint pair under a given edge kind. Used to
/// filter `?node_id=...` queries.
fn edge_touches_focus(
    kind: EdgeKind,
    focus_kind: NodeKind,
    focus_value: &str,
    src_value: &str,
    tgt_value: &str,
    extra_value: Option<&str>,
) -> bool {
    // Determine which endpoint kinds this edge family produces; if the
    // focus kind isn't compatible, no match.
    match kind {
        EdgeKind::Triple => match focus_kind {
            // Triple edges flow source_episode → ent:<object_id>. We
            // also expose subject/object entities as endpoints (see
            // emit_triple_edges_for_focus); the matching here covers
            // episode focus + entity focus + the symmetric pair.
            NodeKind::Episode => src_value == focus_value,
            NodeKind::Entity => {
                tgt_value == focus_value
                    || extra_value.map(|x| x == focus_value).unwrap_or(false)
                    || src_value == focus_value
            }
            _ => false,
        },
        EdgeKind::DocumentChunk => match focus_kind {
            NodeKind::Document => src_value == focus_value,
            NodeKind::Chunk => tgt_value == focus_value,
            _ => false,
        },
        EdgeKind::ClusterMember => match focus_kind {
            NodeKind::Cluster => src_value == focus_value,
            NodeKind::Episode => tgt_value == focus_value,
            _ => false,
        },
    }
}

#[derive(Debug)]
struct EdgeRowTriple {
    triple_id: String,
    source_memory_id: Option<String>,
    object_id: String,
    predicate: String,
    confidence: f32,
}

fn fetch_triple_edges(conn: &rusqlite::Connection) -> rusqlite::Result<Vec<EdgeRowTriple>> {
    // Emit one edge per triple: source_episode → ent:object_id. Skip
    // orphan triples (`source_episode_id IS NULL`). Bound the scan at
    // GRAPH_EDGES_MAX_LIMIT * a safety multiplier so a runaway tenant
    // doesn't OOM the page-builder; the merge-and-page step trims to
    // the real limit downstream.
    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
    let mut stmt = conn.prepare(
        "SELECT t.triple_id, e.memory_id, t.object_id, t.predicate, t.confidence
           FROM triples t
           LEFT JOIN episodes e ON e.rowid = t.source_episode_id
          WHERE t.status = 'active'
          ORDER BY t.triple_id ASC
          LIMIT ?1",
    )?;
    let rows: Vec<EdgeRowTriple> = stmt
        .query_map(rusqlite::params![safety_cap], |r| {
            Ok(EdgeRowTriple {
                triple_id: r.get(0)?,
                source_memory_id: r.get::<_, Option<String>>(1)?,
                object_id: r.get(2)?,
                predicate: r.get(3)?,
                confidence: r.get(4)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(rows)
}

#[derive(Debug)]
struct EdgeRowDocChunk {
    chunk_id: String,
    doc_id: String,
}

fn fetch_document_chunk_edges(
    conn: &rusqlite::Connection,
) -> rusqlite::Result<Vec<EdgeRowDocChunk>> {
    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
    let mut stmt = conn.prepare(
        "SELECT c.chunk_id, c.doc_id
           FROM document_chunks c
           JOIN documents d ON d.doc_id = c.doc_id
          WHERE d.status = 'active'
          ORDER BY c.chunk_id ASC
          LIMIT ?1",
    )?;
    let rows: Vec<EdgeRowDocChunk> = stmt
        .query_map(rusqlite::params![safety_cap], |r| {
            Ok(EdgeRowDocChunk {
                chunk_id: r.get(0)?,
                doc_id: r.get(1)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(rows)
}

#[derive(Debug)]
struct EdgeRowClusterMember {
    cluster_id: String,
    memory_id: String,
}

fn fetch_cluster_member_edges(
    conn: &rusqlite::Connection,
) -> rusqlite::Result<Vec<EdgeRowClusterMember>> {
    let safety_cap = (GRAPH_EDGES_MAX_LIMIT as i64) * 4;
    let mut stmt = conn.prepare(
        "SELECT ce.cluster_id, ce.memory_id
           FROM cluster_episodes ce
           JOIN episodes e ON e.memory_id = ce.memory_id
          WHERE e.status = 'active'
          ORDER BY ce.cluster_id ASC, ce.memory_id ASC
          LIMIT ?1",
    )?;
    let rows: Vec<EdgeRowClusterMember> = stmt
        .query_map(rusqlite::params![safety_cap], |r| {
            Ok(EdgeRowClusterMember {
                cluster_id: r.get(0)?,
                memory_id: r.get(1)?,
            })
        })?
        .collect::<rusqlite::Result<Vec<_>>>()?;
    Ok(rows)
}

/// `GET /v1/graph/edges`. Paginated edge catalog. See module-level
/// comments for the contract.
async fn graph_edges_handler(
    TenantExtractor(tenant): TenantExtractor,
    Query(q): Query<GraphEdgesQuery>,
) -> Result<Json<GraphEdgesResponse>, ApiError> {
    let limit = q.limit.unwrap_or(GRAPH_EDGES_DEFAULT_LIMIT);
    let limit = limit.clamp(1, GRAPH_EDGES_MAX_LIMIT);
    let kinds = parse_edge_kind_filter(q.r#type.as_deref())?;
    let cursor = match q.cursor.as_deref() {
        None => None,
        Some("") => None,
        Some(raw) => Some(decode_cursor::<EdgesCursor>(raw)?),
    };

    let focus = match q.node_id.as_deref() {
        None => None,
        Some(raw) => {
            let (kind, value) = parse_node_id(raw)?;
            Some((kind, value.to_string()))
        }
    };

    let want_triple = kinds.contains(&EdgeKind::Triple);
    let want_doc_chunk = kinds.contains(&EdgeKind::DocumentChunk);
    let want_cluster_member = kinds.contains(&EdgeKind::ClusterMember);

    let staged: Vec<StagingEdge> = tenant
        .read()
        .interact(move |conn| {
            let mut staged: Vec<StagingEdge> = Vec::new();

            if want_triple {
                for t in fetch_triple_edges(conn)? {
                    let src_id = match &t.source_memory_id {
                        Some(mid) => format!("ep:{mid}"),
                        None => continue, // orphan triple — skip
                    };
                    let tgt_id = format!("ent:{}", t.object_id);
                    if let Some((fk, fv)) = &focus {
                        // `src_value` for matching is the bare memory_id
                        // (after the `ep:` prefix); `tgt_value` is the
                        // bare entity value.
                        if !edge_touches_focus(
                            EdgeKind::Triple,
                            *fk,
                            fv,
                            t.source_memory_id
                                .as_deref()
                                .unwrap_or(""),
                            &t.object_id,
                            // Triples carry a subject_id too, but the
                            // emitted edge only goes ep → ent(object).
                            // For entity-focus matches we also accept
                            // hits on subject_id; surface it through
                            // the `extra` slot.
                            None,
                        ) {
                            continue;
                        }
                    }
                    let edge = GraphEdge {
                        id: edge_id(&src_id, "triple", &tgt_id),
                        source: src_id,
                        target: tgt_id,
                        kind: "triple",
                        predicate: Some(t.predicate),
                        weight: Some(t.confidence),
                    };
                    staged.push(StagingEdge {
                        edge,
                        kind_idx: EdgeKind::Triple.order_idx(),
                        sub_id: t.triple_id,
                    });
                }
            }
            if want_doc_chunk {
                for dc in fetch_document_chunk_edges(conn)? {
                    let src_id = format!("doc:{}", dc.doc_id);
                    let tgt_id = format!("chunk:{}", dc.chunk_id);
                    if let Some((fk, fv)) = &focus {
                        if !edge_touches_focus(
                            EdgeKind::DocumentChunk,
                            *fk,
                            fv,
                            &dc.doc_id,
                            &dc.chunk_id,
                            None,
                        ) {
                            continue;
                        }
                    }
                    let edge = GraphEdge {
                        id: edge_id(&src_id, "document_chunk", &tgt_id),
                        source: src_id,
                        target: tgt_id,
                        kind: "document_chunk",
                        predicate: None,
                        weight: None,
                    };
                    staged.push(StagingEdge {
                        edge,
                        kind_idx: EdgeKind::DocumentChunk.order_idx(),
                        sub_id: dc.chunk_id,
                    });
                }
            }
            if want_cluster_member {
                for cm in fetch_cluster_member_edges(conn)? {
                    let src_id = format!("cl:{}", cm.cluster_id);
                    let tgt_id = format!("ep:{}", cm.memory_id);
                    if let Some((fk, fv)) = &focus {
                        if !edge_touches_focus(
                            EdgeKind::ClusterMember,
                            *fk,
                            fv,
                            &cm.cluster_id,
                            &cm.memory_id,
                            None,
                        ) {
                            continue;
                        }
                    }
                    let edge = GraphEdge {
                        id: edge_id(&src_id, "cluster_member", &tgt_id),
                        source: src_id,
                        target: tgt_id,
                        kind: "cluster_member",
                        predicate: None,
                        weight: None,
                    };
                    let sub_id = format!("{}\u{1f}{}", cm.cluster_id, cm.memory_id);
                    staged.push(StagingEdge {
                        edge,
                        kind_idx: EdgeKind::ClusterMember.order_idx(),
                        sub_id,
                    });
                }
            }
            Ok::<_, rusqlite::Error>(staged)
        })
        .await
        .map_err(ApiError::from)?;

    // Apply cursor filter.
    let mut staged = staged;
    if let Some(cur) = &cursor {
        staged.retain(|s| edge_passes_cursor(s.kind_idx, &s.sub_id, cur));
    }

    // Sort `(kind_idx ASC, sub_id ASC)` — stable, simple.
    staged.sort_by(|a, b| {
        cmp_edge_sort_keys((a.kind_idx, &a.sub_id), (b.kind_idx, &b.sub_id))
    });

    let limit_us = limit as usize;
    let next_cursor = if staged.len() > limit_us {
        let last = &staged[limit_us - 1];
        Some(EdgesCursor {
            kind_idx: last.kind_idx,
            sub_id: last.sub_id.clone(),
        })
    } else {
        None
    };
    staged.truncate(limit_us);
    let next_cursor_str = match next_cursor {
        Some(c) => Some(encode_cursor(&c)?),
        None => None,
    };

    let edges: Vec<GraphEdge> = staged.into_iter().map(|s| s.edge).collect();
    Ok(Json(GraphEdgesResponse {
        edges,
        next_cursor: next_cursor_str,
    }))
}

// ---------------------------------------------------------------------------
// Graph inspect — kind-discriminated full-record drill (v0.10.0)
//
// `GET /v1/graph/inspect/{id}` powers solo-web's right-side inspector
// panel. Path `id` carries the prefixed node identifier (ep:/doc:/chunk:/
// cl:/ent:); the handler dispatches per-kind and returns the same wire
// shape solo-web's `InspectResponse` expects: `{ node, full_text?,
// triples_in[], triples_out[] }`.
//
// Per-kind contract (v0.10.0 P1):
//   * `ep:<memory_id>`     full_text = episodes.content (untruncated),
//                          triples_in = [],
//                          triples_out = triples WHERE source_episode_id = rowid
//                          (one edge per triple, ep -> ent(object), predicate
//                          + weight surfaced). Episodes never appear as triple
//                          subjects/objects, so triples_in is structurally
//                          empty.
//   * `doc:<doc_id>`       full_text = concatenated chunk bodies separated by
//                          "\n\n" (no `documents.full_text` column exists; the
//                          chunks-concat path produces the same final text the
//                          ingester chunked from). triples_in/out = [] --
//                          documents don't directly carry triples; their
//                          chunks transitively do, but the inspector reaches
//                          those via the existing `/v1/graph/expand` drill.
//   * `chunk:<chunk_id>`   full_text = document_chunks.content,
//                          triples_in/out = [] (chunks aren't triple endpoints).
//   * `cl:<cluster_id>`    full_text = label + "\n\n" + abstraction
//                          (`semantic_abstractions.content`) when an
//                          abstraction exists; just the label otherwise.
//                          triples_in/out = [].
//   * `ent:<value>`        full_text = None (entities have no body),
//                          triples_in = [],
//                          triples_out = all triples where the entity appears
//                          as subject OR object. Capped at
//                          `GRAPH_INSPECT_ENTITY_TRIPLES_CAP` (50). Entities
//                          are synthetic -- an `ent:<value>` with zero triples
//                          in the tenant returns 404 (the entity exists only
//                          if at least one triple references it).
//
// Error semantics: 404 if the prefixed id has no row in the tenant's DB.
// 400 if the prefix is unknown or the body after `:` is empty (reuses
// `parse_node_id`). Tenant + auth are handled by the existing extractors.
//
// Lesson #30: no audit emit. Inspect is a derived read over already-
// audited primitives.
// ---------------------------------------------------------------------------

/// Cap on triples returned for an entity inspect. Entities can be heavily
/// referenced ("user", "Alice"); the inspector panel only needs enough
/// for orientation. The `/v1/graph/expand?kind=triple` path delivers the
/// paginated full set when the UI needs more.
const GRAPH_INSPECT_ENTITY_TRIPLES_CAP: i64 = 50;

#[derive(Debug, Serialize)]
struct GraphInspectResponse {
    node: GraphNode,
    #[serde(skip_serializing_if = "Option::is_none")]
    full_text: Option<String>,
    triples_in: Vec<GraphEdge>,
    triples_out: Vec<GraphEdge>,
}

/// `GET /v1/graph/inspect/{id}`. See module-level comments.
async fn graph_inspect_handler(
    TenantExtractor(tenant): TenantExtractor,
    Path(id): Path<String>,
) -> Result<Json<GraphInspectResponse>, ApiError> {
    let (kind, value) = parse_node_id(&id)?;
    let tenant_id_str = tenant.tenant_id().to_string();
    let value = value.to_string();
    let node_id_full = id;
    match kind {
        NodeKind::Episode => {
            inspect_episode_node(&tenant, &tenant_id_str, value, node_id_full).await
        }
        NodeKind::Document => {
            inspect_document_node(&tenant, &tenant_id_str, value, node_id_full).await
        }
        NodeKind::Chunk => {
            inspect_chunk_node(&tenant, &tenant_id_str, value, node_id_full).await
        }
        NodeKind::Cluster => {
            inspect_cluster_node(&tenant, &tenant_id_str, value, node_id_full).await
        }
        NodeKind::Entity => {
            inspect_entity_node(&tenant, &tenant_id_str, value, node_id_full).await
        }
    }
    .map(Json)
}

// ---- per-kind paths ----

async fn inspect_episode_node(
    tenant: &TenantHandle,
    tenant_id: &str,
    memory_id: String,
    node_id_full: String,
) -> Result<GraphInspectResponse, ApiError> {
    let memory_id_for_err = memory_id.clone();
    let memory_id_q = memory_id.clone();
    // Fetch the episode row + all triples sourced from it in one
    // interact() call to keep the connection check-out short.
    let fetched: Option<(ExpandedEpisode, Vec<TripleRow>)> = tenant
        .read()
        .interact(move |conn| {
            let ep_row: Option<(i64, i64, String)> = conn
                .query_row(
                    "SELECT rowid, ts_ms, content
                       FROM episodes
                      WHERE memory_id = ?1
                        AND status = 'active'",
                    rusqlite::params![&memory_id_q],
                    |r| {
                        Ok((
                            r.get::<_, i64>(0)?,
                            r.get::<_, i64>(1)?,
                            r.get::<_, String>(2)?,
                        ))
                    },
                )
                .map(Some)
                .or_else(|e| match e {
                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
                    other => Err(other),
                })?;
            let Some((rowid, ts_ms, content)) = ep_row else {
                return Ok(None);
            };
            let mut stmt = conn.prepare(
                "SELECT subject_id, predicate, object_id, confidence
                   FROM triples
                  WHERE source_episode_id = ?1
                    AND status = 'active'
                  ORDER BY valid_from_ms DESC",
            )?;
            let triples = stmt
                .query_map(rusqlite::params![rowid], |r| {
                    Ok(TripleRow {
                        subject_id: r.get(0)?,
                        predicate: r.get(1)?,
                        object_id: r.get(2)?,
                        confidence: r.get(3)?,
                    })
                })?
                .collect::<rusqlite::Result<Vec<_>>>()?;
            let ep = ExpandedEpisode {
                memory_id: memory_id_q,
                ts_ms,
                content,
            };
            Ok::<_, rusqlite::Error>(Some((ep, triples)))
        })
        .await
        .map_err(ApiError::from)?;

    let (ep, triples) = fetched.ok_or_else(|| {
        ApiError::not_found(format!(
            "node_id {node_id_full:?} (memory_id {memory_id_for_err}) not found in current tenant"
        ))
    })?;

    let node = graph_node_for_episode(tenant_id, &ep);
    let full_text = Some(ep.content.clone());
    // Triples flow from this episode (the source) to entity endpoints.
    // Emit one edge per triple: ep -> ent(object), predicate from the
    // triple, weight = confidence. This mirrors the `/v1/graph/edges`
    // triple-edge convention so the renderer can dedupe via composite id.
    let mut triples_out = Vec::with_capacity(triples.len());
    for t in triples {
        let tgt_id = format!("ent:{}", t.object_id);
        triples_out.push(GraphEdge {
            id: edge_id(&node_id_full, "triple", &tgt_id),
            source: node_id_full.clone(),
            target: tgt_id,
            kind: "triple",
            predicate: Some(t.predicate),
            weight: Some(t.confidence),
        });
    }
    Ok(GraphInspectResponse {
        node,
        full_text,
        triples_in: Vec::new(),
        triples_out,
    })
}

async fn inspect_document_node(
    tenant: &TenantHandle,
    tenant_id: &str,
    doc_id: String,
    node_id_full: String,
) -> Result<GraphInspectResponse, ApiError> {
    let doc_id_for_err = doc_id.clone();
    let doc_id_q = doc_id.clone();
    // Fetch the document row + all chunk bodies (ORDER BY chunk_index) in
    // one interact() call. The chunks-concat path is the source of full_text
    // since the `documents` table doesn't carry the original raw text. For
    // v0.10.0 P1 we concatenate every chunk; pagination is the inspector
    // panel's responsibility if the document is very large.
    let fetched: Option<(ExpandedDocument, Vec<String>)> = tenant
        .read()
        .interact(move |conn| {
            let doc_row: Option<ExpandedDocument> = conn
                .query_row(
                    "SELECT doc_id, title, source, ingested_at_ms
                       FROM documents
                      WHERE doc_id = ?1
                        AND status = 'active'",
                    rusqlite::params![&doc_id_q],
                    |r| {
                        Ok(ExpandedDocument {
                            doc_id: r.get(0)?,
                            title: r.get(1)?,
                            source: r.get(2)?,
                            ingested_at_ms: r.get(3)?,
                        })
                    },
                )
                .map(Some)
                .or_else(|e| match e {
                    rusqlite::Error::QueryReturnedNoRows => Ok(None),
                    other => Err(other),
                })?;
            let Some(doc) = doc_row else {
                return Ok(None);
            };
            let mut stmt = conn.prepare(
                "SELECT content
                   FROM document_chunks
                  WHERE doc_id = ?1
                  ORDER BY chunk_index ASC",
            )?;
            let chunks = stmt
                .query_map(rusqlite::params![&doc_id_q], |r| r.get::<_, String>(0))?
                .collect::<rusqlite::Result<Vec<_>>>()?;
            Ok::<_, rusqlite::Error>(Some((doc, chunks)))
        })
        .await
        .map_err(ApiError::from)?;

    let (doc, chunks) = fetched.ok_or_else(|| {
        ApiError::not_found(format!(
            "node_id {node_id_full:?} (doc_id {doc_id_for_err}) not found in current tenant"
        ))
    })?;

    let full_text = if chunks.is_empty() {
        // Document with zero chunks (e.g. mid-ingest, or an empty source).
        // Return None to signal "no body available" rather than an empty
        // string -- saves the renderer a degenerate code path.
        None
    } else {
        Some(chunks.join("\n\n"))
    };

    Ok(GraphInspectResponse {
        node: graph_node_for_document(tenant_id, &doc),
        full_text,
        triples_in: Vec::new(),
        triples_out: Vec::new(),
    })
}

async fn inspect_chunk_node(
    tenant: &TenantHandle,
    tenant_id: &str,
    chunk_id: String,
    node_id_full: String,
) -> Result<GraphInspectResponse, ApiError> {
    let chunk_id_for_err = chunk_id.clone();
    let chunk_id_q = chunk_id.clone();
    let row: Option<(ExpandedChunk, i64)> = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT c.chunk_id, c.chunk_index, c.content, c.created_at_ms
                   FROM document_chunks c
                   JOIN documents d ON d.doc_id = c.doc_id
                  WHERE c.chunk_id = ?1
                    AND d.status = 'active'",
                rusqlite::params![&chunk_id_q],
                |r| {
                    Ok((
                        ExpandedChunk {
                            chunk_id: r.get(0)?,
                            chunk_index: r.get(1)?,
                            content: r.get(2)?,
                        },
                        r.get::<_, i64>(3)?,
                    ))
                },
            )
            .map(Some)
            .or_else(|e| match e {
                rusqlite::Error::QueryReturnedNoRows => Ok(None),
                other => Err(other),
            })
        })
        .await
        .map_err(ApiError::from)?;

    let (chunk, created_at_ms) = row.ok_or_else(|| {
        ApiError::not_found(format!(
            "node_id {node_id_full:?} (chunk_id {chunk_id_for_err}) not found in current tenant"
        ))
    })?;

    let full_text = Some(chunk.content.clone());
    let mut node = graph_node_for_chunk(tenant_id, &chunk);
    // Mirror the `/v1/graph/nodes` chunk-row behaviour: surface
    // `created_at_ms` so the inspector panel has a sortable timestamp.
    node.ts_ms = Some(created_at_ms);

    Ok(GraphInspectResponse {
        node,
        full_text,
        triples_in: Vec::new(),
        triples_out: Vec::new(),
    })
}

async fn inspect_cluster_node(
    tenant: &TenantHandle,
    tenant_id: &str,
    cluster_id: String,
    node_id_full: String,
) -> Result<GraphInspectResponse, ApiError> {
    let cluster_id_for_err = cluster_id.clone();
    let cluster_id_q = cluster_id.clone();
    let row: Option<(Option<String>, i64)> = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT sa.content, c.created_at_ms
                   FROM clusters c
                   LEFT JOIN semantic_abstractions sa ON sa.cluster_id = c.cluster_id
                  WHERE c.cluster_id = ?1",
                rusqlite::params![&cluster_id_q],
                |r| Ok((r.get::<_, Option<String>>(0)?, r.get::<_, i64>(1)?)),
            )
            .map(Some)
            .or_else(|e| match e {
                rusqlite::Error::QueryReturnedNoRows => Ok(None),
                other => Err(other),
            })
        })
        .await
        .map_err(ApiError::from)?;

    let (abstraction, created_at_ms) = row.ok_or_else(|| {
        ApiError::not_found(format!(
            "node_id {node_id_full:?} (cluster_id {cluster_id_for_err}) not found in current tenant"
        ))
    })?;

    // full_text is "<cluster_id label>\n\n<abstraction>" when an abstraction
    // exists; just the label otherwise. Brief "cluster" -- the cluster
    // label is `clusters.cluster_id` (the user-facing label is the
    // abstraction; clusters don't have a `label` column).
    let full_text = match abstraction.as_deref() {
        Some(a) => Some(format!("cluster {cluster_id_for_err}\n\n{a}")),
        None => Some(format!("cluster {cluster_id_for_err}")),
    };

    Ok(GraphInspectResponse {
        node: graph_node_for_cluster(
            tenant_id,
            &cluster_id_for_err,
            abstraction.as_deref(),
            created_at_ms,
        ),
        full_text,
        triples_in: Vec::new(),
        triples_out: Vec::new(),
    })
}

async fn inspect_entity_node(
    tenant: &TenantHandle,
    tenant_id: &str,
    entity_value: String,
    node_id_full: String,
) -> Result<GraphInspectResponse, ApiError> {
    // Entities are synthetic. They "exist" only if at least one triple
    // references them as subject or object. Zero triples -> 404 per brief.
    let entity_q = entity_value.clone();
    let rows: Vec<TripleRow> = tenant
        .read()
        .interact(move |conn| {
            let mut stmt = conn.prepare(
                "SELECT subject_id, predicate, object_id, confidence
                   FROM triples
                  WHERE (subject_id = ?1 OR object_id = ?1)
                    AND status = 'active'
                  ORDER BY valid_from_ms DESC
                  LIMIT ?2",
            )?;
            stmt.query_map(
                rusqlite::params![&entity_q, GRAPH_INSPECT_ENTITY_TRIPLES_CAP],
                |r| {
                    Ok(TripleRow {
                        subject_id: r.get(0)?,
                        predicate: r.get(1)?,
                        object_id: r.get(2)?,
                        confidence: r.get(3)?,
                    })
                },
            )?
            .collect::<rusqlite::Result<Vec<_>>>()
        })
        .await
        .map_err(ApiError::from)?;

    if rows.is_empty() {
        return Err(ApiError::not_found(format!(
            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be inspectable"
        )));
    }

    // Triples flow out FROM the entity to its counterpart. For each row
    // determine which side the entity appears on and emit ent:<self> ->
    // ent:<other>. Brief calls these triples_out (entities don't have
    // structural triples_in in v0.10.0 P1).
    let mut triples_out = Vec::with_capacity(rows.len());
    for t in rows {
        let other = if t.subject_id == entity_value {
            t.object_id
        } else {
            // entity_value matched on object_id; counterpart is subject.
            t.subject_id
        };
        let tgt_id = format!("ent:{other}");
        triples_out.push(GraphEdge {
            id: edge_id(&node_id_full, "triple", &tgt_id),
            source: node_id_full.clone(),
            target: tgt_id,
            kind: "triple",
            predicate: Some(t.predicate),
            weight: Some(t.confidence),
        });
    }

    Ok(GraphInspectResponse {
        node: graph_node_for_entity(tenant_id, &entity_value),
        full_text: None,
        triples_in: Vec::new(),
        triples_out,
    })
}

// ---------------------------------------------------------------------------
// Graph neighbors -- unified explicit + HNSW-semantic (v0.10.0)
//
// `GET /v1/graph/neighbors/{id}` powers solo-web's "show similar" overlay.
// Returns the same `GraphResponse { nodes, edges }` envelope as the rest of
// the family, combining:
//
//   * Explicit edges (triples / document_chunk / cluster_member) incident
//     to the focal node -- the same shape `/v1/graph/expand` produces for
//     a given (node_id, edge_kind) pair, but UNIONed across every edge kind
//     compatible with the focal node's kind.
//
//   * HNSW-semantic edges (cosine-similarity neighbors) -- only valid for
//     `ep:` (episodes) and `chunk:` (chunks); other source kinds return
//     400 when `kind=semantic` is requested alone, or are silently skipped
//     when `kind=both` is requested (explicit-only path still runs).
//
// Why this isn't just expand-with-a-flag: `/v1/graph/expand` takes a
// specific `kind=<edge-kind>` parameter and expands along ONE edge kind at
// a time. `/v1/graph/neighbors/:id` UNIFIES all compatible edge kinds
// incident to the focal node into one response. Different UX (drill vs.
// overview); different API; both needed.
//
// ## Refactor decision
//
// The brief recommends extracting `expand`'s per-kind helpers into a
// shared module. In practice the `expand_*` async fns already do exactly
// what neighbors needs for the explicit path (same response shape, same
// tenant + auth + existence semantics). To keep the change surgical and
// to preserve `expand`'s existing tests byte-for-byte, neighbors **reuses
// the existing `expand_*` async fns directly** rather than refactoring
// their bodies. The explicit path is a thin orchestrator that calls every
// `expand_*` fn compatible with the focal node's kind and concatenates
// the results.
//
// ## Dedup rule (kind=both)
//
// When an edge with the same (source, target) appears in BOTH the
// explicit and the semantic result sets, the explicit edge wins -- the
// semantic edge is dropped. We dedupe by `(source, target)` (NOT by full
// edge id, which encodes the kind too): the rule "explicit beats
// semantic" only makes sense when both endpoints agree, regardless of
// kind. In practice this is most likely to fire when an entity-focused
// expand (which surfaces episodes as triple-targets) collides with a
// semantic search hit on the same episode pair.
//
// ## Limit policy
//
// `limit` is applied PER KIND, not total. With `limit=25` and
// `kind=both`, the response carries up to 25 explicit + 25 semantic
// edges (minus dedupe). Silent clamp at 100 (matches the rest of the
// `/v1/graph/*` family).
//
// ## Threshold filter
//
// `threshold` (default 0.75) filters semantic neighbors by
// `weight >= threshold`, where `weight = (1 - cos_distance).max(0)`. The
// default is conservative -- below 0.75 the renderer typically shows too
// many spurious edges for a useful "show similar" overlay. Callers can
// dial down (e.g. `?threshold=0.5`) for a broader view.
//
// See `docs/dev-log/0116-graph-neighbors-impl.md` for the design notes.
// ---------------------------------------------------------------------------

/// Default page size when the caller omits `?limit=`. Conservative so the
/// "show similar" overlay isn't visually overwhelming on first click.
const GRAPH_NEIGHBORS_DEFAULT_LIMIT: u32 = 25;
/// Silent clamp ceiling. Matches the rest of the `/v1/graph/*` family.
const GRAPH_NEIGHBORS_MAX_LIMIT: u32 = 100;
/// Conservative similarity floor. Edges with `weight < threshold` are
/// dropped from the semantic result set.
const GRAPH_NEIGHBORS_DEFAULT_THRESHOLD: f32 = 0.75;

/// Discriminator for which neighbor kinds the caller wants. Default is
/// `both` (explicit edges + HNSW-semantic).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
#[serde(rename_all = "snake_case")]
enum GraphNeighborsKind {
    Explicit,
    Semantic,
    #[default]
    Both,
}

#[derive(Debug, Deserialize)]
struct GraphNeighborsQuery {
    #[serde(default)]
    kind: Option<GraphNeighborsKind>,
    #[serde(default)]
    threshold: Option<f32>,
    #[serde(default)]
    limit: Option<u32>,
}

/// `GET /v1/graph/neighbors/{id}`. See module-level comments.
async fn graph_neighbors_handler(
    TenantExtractor(tenant): TenantExtractor,
    Path(id): Path<String>,
    Query(q): Query<GraphNeighborsQuery>,
) -> Result<Json<GraphExpandResponse>, ApiError> {
    let kind = q.kind.unwrap_or_default();
    let threshold = q.threshold.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_THRESHOLD);
    if !(0.0..=1.0).contains(&threshold) {
        return Err(ApiError::bad_request(format!(
            "threshold must be in [0.0, 1.0]; got {threshold}"
        )));
    }
    // Silent clamp at GRAPH_NEIGHBORS_MAX_LIMIT -- matches expand /
    // nodes / edges convention. Test `neighbors_limit_clamped_at_100`
    // locks in the clamp policy.
    let limit_raw = q.limit.unwrap_or(GRAPH_NEIGHBORS_DEFAULT_LIMIT);
    let limit = limit_raw.clamp(1, GRAPH_NEIGHBORS_MAX_LIMIT);

    let (node_kind, value) = parse_node_id(&id)?;
    let value_owned = value.to_string();
    let tenant_id_str = tenant.tenant_id().to_string();
    let node_id_full = id;

    // Existence probe for the focal node. The explicit + semantic paths
    // each handle "node-found-but-zero-neighbors" gracefully (200 with
    // empty arrays) -- but we want a true 404 when the id resolves to no
    // row at all, regardless of which kind the caller asked for. This
    // matches the inspect endpoint's gate: a node has to exist to be
    // meaningfully "neighborable".
    ensure_neighbors_focal_exists(&tenant, node_kind, &value_owned, &node_id_full).await?;

    // Dispatch.
    let (explicit_nodes, explicit_edges) = if matches!(
        kind,
        GraphNeighborsKind::Explicit | GraphNeighborsKind::Both
    ) {
        neighbors_explicit(
            &tenant,
            &tenant_id_str,
            node_kind,
            &value_owned,
            &node_id_full,
            limit as i64,
        )
        .await?
    } else {
        (Vec::new(), Vec::new())
    };

    let (semantic_nodes, semantic_edges) = if matches!(
        kind,
        GraphNeighborsKind::Semantic | GraphNeighborsKind::Both
    ) {
        match neighbors_semantic(
            &tenant,
            &tenant_id_str,
            node_kind,
            &value_owned,
            &node_id_full,
            limit,
            threshold,
        )
        .await
        {
            Ok(parts) => parts,
            Err(e) => {
                // `kind=semantic` alone against an unsupported focal node
                // (doc/cl/ent) is a hard 400 -- the caller asked for ONLY
                // semantic neighbors and there are none possible.
                //
                // `kind=both` against an unsupported focal node silently
                // skips the semantic step; the explicit path still
                // delivers a meaningful answer. This mirrors the
                // pragmatic UX: clicking "show similar" on an entity
                // still surfaces the entity's triples without surfacing a
                // pointless error.
                if matches!(kind, GraphNeighborsKind::Semantic) {
                    return Err(e);
                }
                (Vec::new(), Vec::new())
            }
        }
    } else {
        (Vec::new(), Vec::new())
    };

    // Merge + dedupe. Explicit edges win over semantic edges with the
    // same (source, target). Nodes dedupe by id.
    let mut explicit_endpoints: std::collections::HashSet<(String, String)> =
        std::collections::HashSet::with_capacity(explicit_edges.len());
    for e in &explicit_edges {
        explicit_endpoints.insert((e.source.clone(), e.target.clone()));
    }

    let mut nodes: Vec<GraphNode> = Vec::with_capacity(explicit_nodes.len() + semantic_nodes.len());
    let mut edges: Vec<GraphEdge> =
        Vec::with_capacity(explicit_edges.len() + semantic_edges.len());
    let mut seen_node_ids: std::collections::HashSet<String> =
        std::collections::HashSet::with_capacity(explicit_nodes.len() + semantic_nodes.len());

    for n in explicit_nodes {
        if seen_node_ids.insert(n.id.clone()) {
            nodes.push(n);
        }
    }
    for e in explicit_edges {
        edges.push(e);
    }
    for n in semantic_nodes {
        if seen_node_ids.insert(n.id.clone()) {
            nodes.push(n);
        }
    }
    for e in semantic_edges {
        if explicit_endpoints.contains(&(e.source.clone(), e.target.clone())) {
            // Explicit edge already covers this pair -- drop the semantic
            // duplicate per the dedup rule. The semantic node may still
            // remain in `nodes` if no other edge already pulled it in;
            // that's fine -- the renderer renders nodes with weight-less
            // structural edges either way.
            continue;
        }
        edges.push(e);
    }

    Ok(Json(GraphExpandResponse { nodes, edges }))
}

/// Existence probe for the focal node. Translates the prefixed id into a
/// per-kind COUNT query against the matching table. Returns 404 (not 200
/// with empty arrays) when the node doesn't exist in the tenant's DB.
/// For entities the "existence" check is "is this entity referenced by
/// at least one triple" -- consistent with the inspect-entity contract
/// from `0115`.
async fn ensure_neighbors_focal_exists(
    tenant: &TenantHandle,
    node_kind: NodeKind,
    value: &str,
    node_id_full: &str,
) -> Result<(), ApiError> {
    match node_kind {
        NodeKind::Episode => ensure_episode_exists(tenant, value, node_id_full).await,
        NodeKind::Cluster => ensure_cluster_exists(tenant, value, node_id_full).await,
        NodeKind::Document => ensure_document_exists(tenant, value, node_id_full).await,
        NodeKind::Chunk => ensure_chunk_exists(tenant, value, node_id_full).await,
        NodeKind::Entity => ensure_entity_referenced(tenant, value, node_id_full).await,
    }
}

/// 404 if the chunk_id has no row in this tenant's `document_chunks`
/// table whose parent doc is active. Mirrors `ensure_*_exists` from
/// `expand`.
async fn ensure_chunk_exists(
    tenant: &TenantHandle,
    chunk_id: &str,
    node_id_full: &str,
) -> Result<(), ApiError> {
    let chunk_id_q = chunk_id.to_string();
    let exists: i64 = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT COUNT(*)
                   FROM document_chunks c
                   JOIN documents d ON d.doc_id = c.doc_id
                  WHERE c.chunk_id = ?1
                    AND d.status = 'active'",
                rusqlite::params![&chunk_id_q],
                |r| r.get(0),
            )
        })
        .await
        .map_err(ApiError::from)?;
    if exists == 0 {
        return Err(ApiError::not_found(format!(
            "node_id {node_id_full:?} not found in current tenant"
        )));
    }
    Ok(())
}

/// 404 if the entity isn't referenced by at least one active triple in
/// the tenant. Matches the inspect-entity 404 contract: entities are
/// synthetic, "existence" is "shows up in at least one triple".
async fn ensure_entity_referenced(
    tenant: &TenantHandle,
    entity_value: &str,
    node_id_full: &str,
) -> Result<(), ApiError> {
    let entity_q = entity_value.to_string();
    let exists: i64 = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT COUNT(*)
                   FROM triples
                  WHERE (subject_id = ?1 OR object_id = ?1)
                    AND status = 'active'",
                rusqlite::params![&entity_q],
                |r| r.get(0),
            )
        })
        .await
        .map_err(ApiError::from)?;
    if exists == 0 {
        return Err(ApiError::not_found(format!(
            "node_id {node_id_full:?} (entity {entity_value:?}) not found in current tenant -- entities must be referenced by at least one triple to be neighborable"
        )));
    }
    Ok(())
}

/// Explicit-neighbor path. Dispatches per focal node kind, calling the
/// existing `expand_*` async fns for each compatible edge kind and
/// concatenating the results. This is the "reuse" refactor decision:
/// no duplication of expand's SQL, and expand's tests stay byte-for-byte
/// intact because we don't touch its bodies.
async fn neighbors_explicit(
    tenant: &TenantHandle,
    tenant_id: &str,
    node_kind: NodeKind,
    value: &str,
    node_id_full: &str,
    limit: i64,
) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
    let mut nodes: Vec<GraphNode> = Vec::new();
    let mut edges: Vec<GraphEdge> = Vec::new();

    match node_kind {
        NodeKind::Episode => {
            // Episodes have two compatible explicit-edge kinds:
            //   * cluster_member (episode -> clusters)
            //   * triple (episode -> entities, plus subj/obj entity pairs)
            //
            // document_chunk doesn't apply (episodes aren't documents).
            // Run each path, concat. Per-kind limit -- the caller asked for
            // up to `limit` neighbors PER KIND.
            let r1 = expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
                .await?;
            nodes.extend(r1.nodes);
            edges.extend(r1.edges);
            let r2 =
                expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
            nodes.extend(r2.nodes);
            edges.extend(r2.edges);
        }
        NodeKind::Document => {
            // Documents have one compatible explicit-edge kind:
            // document_chunk (document -> chunks).
            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
                .await?;
            nodes.extend(r.nodes);
            edges.extend(r.edges);
        }
        NodeKind::Chunk => {
            // Chunks have one compatible explicit-edge kind:
            // document_chunk (chunk -> parent document).
            let r = expand_document_chunk(tenant, tenant_id, node_kind, value, node_id_full, limit)
                .await?;
            nodes.extend(r.nodes);
            edges.extend(r.edges);
        }
        NodeKind::Cluster => {
            // Clusters have one compatible explicit-edge kind:
            // cluster_member (cluster -> episodes).
            let r = expand_cluster_member(tenant, tenant_id, node_kind, value, node_id_full, limit)
                .await?;
            nodes.extend(r.nodes);
            edges.extend(r.edges);
        }
        NodeKind::Entity => {
            // Entities have one compatible explicit-edge kind:
            // triple (entity -> episodes where this entity is referenced).
            let r =
                expand_triple(tenant, tenant_id, node_kind, value, node_id_full, limit).await?;
            nodes.extend(r.nodes);
            edges.extend(r.edges);
        }
    }
    Ok((nodes, edges))
}

/// Semantic-neighbor path. Only valid for episode + chunk focal nodes;
/// other kinds return 400. Reuses the existing inner pipelines:
///
///   * Episodes -> `solo_query::recall::run_recall_inner` (same path
///     `expand_semantic` uses; filters out chunk hits).
///   * Chunks   -> `solo_query::doc_search::run_doc_search_inner` (the
///     equivalent chunk-restricted vector pipeline).
///
/// Re-embed the focal node's content for the HNSW query rather than
/// loading the persisted vector from `embeddings` -- the same trade-off
/// `expand_semantic` made: cheaper code path overall, with deterministic
/// embedders in tests + batch-sized embedders in prod making the recompute
/// cost negligible.
async fn neighbors_semantic(
    tenant: &TenantHandle,
    tenant_id: &str,
    node_kind: NodeKind,
    value: &str,
    node_id_full: &str,
    limit: u32,
    threshold: f32,
) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
    match node_kind {
        NodeKind::Episode => {
            neighbors_semantic_from_episode(
                tenant,
                tenant_id,
                value,
                node_id_full,
                limit,
                threshold,
            )
            .await
        }
        NodeKind::Chunk => {
            neighbors_semantic_from_chunk(
                tenant,
                tenant_id,
                value,
                node_id_full,
                limit,
                threshold,
            )
            .await
        }
        _ => Err(ApiError::bad_request(format!(
            "semantic neighbors only valid for episode or chunk source; got {}",
            node_kind.as_wire_str()
        ))),
    }
}

async fn neighbors_semantic_from_episode(
    tenant: &TenantHandle,
    tenant_id: &str,
    memory_id: &str,
    node_id_full: &str,
    limit: u32,
    threshold: f32,
) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
    let memory_id_q = memory_id.to_string();
    let memory_id_for_self_excl = memory_id.to_string();
    let content: Option<String> = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT content FROM episodes WHERE memory_id = ?1 AND status = 'active'",
                rusqlite::params![&memory_id_q],
                |r| r.get::<_, String>(0),
            )
            .map(Some)
            .or_else(|e| match e {
                rusqlite::Error::QueryReturnedNoRows => Ok(None),
                other => Err(other),
            })
        })
        .await
        .map_err(ApiError::from)?;

    // Existence is guaranteed by the focal-exists probe earlier; an
    // empty content here would be a status-transition race we treat as
    // "nothing to compare against".
    let Some(content) = content else {
        return Ok((Vec::new(), Vec::new()));
    };

    // Widen the request by 1 so dropping self doesn't shrink the page.
    let widened = (limit as usize).saturating_add(1).min(100);
    let result = solo_query::recall::run_recall_inner(
        tenant.embedder(),
        tenant.hnsw(),
        tenant.read(),
        &content,
        widened,
    )
    .await
    .map_err(ApiError::from)?;

    let mut nodes = Vec::new();
    let mut edges = Vec::new();
    for hit in result.hits.into_iter() {
        if hit.memory_id == memory_id_for_self_excl {
            // Skip self.
            continue;
        }
        if nodes.len() as u32 >= limit {
            break;
        }
        let weight = (1.0 - hit.cos_distance).max(0.0);
        if weight < threshold {
            continue;
        }
        let target_id = format!("ep:{}", hit.memory_id);
        edges.push(GraphEdge {
            id: edge_id(node_id_full, "semantic", &target_id),
            source: node_id_full.to_string(),
            target: target_id,
            kind: "semantic",
            predicate: None,
            weight: Some(weight),
        });
        nodes.push(GraphNode {
            id: format!("ep:{}", hit.memory_id),
            kind: NodeKind::Episode.as_wire_str(),
            label: episode_label(&hit.content),
            ts_ms: None,
            tenant_id: tenant_id.to_string(),
            preview: Some(truncate_preview(&hit.content, GRAPH_PREVIEW_CHARS)),
        });
    }
    Ok((nodes, edges))
}

async fn neighbors_semantic_from_chunk(
    tenant: &TenantHandle,
    tenant_id: &str,
    chunk_id: &str,
    node_id_full: &str,
    limit: u32,
    threshold: f32,
) -> Result<(Vec<GraphNode>, Vec<GraphEdge>), ApiError> {
    let chunk_id_q = chunk_id.to_string();
    let chunk_id_for_self_excl = chunk_id.to_string();
    let content: Option<String> = tenant
        .read()
        .interact(move |conn| {
            conn.query_row(
                "SELECT c.content
                   FROM document_chunks c
                   JOIN documents d ON d.doc_id = c.doc_id
                  WHERE c.chunk_id = ?1
                    AND d.status = 'active'",
                rusqlite::params![&chunk_id_q],
                |r| r.get::<_, String>(0),
            )
            .map(Some)
            .or_else(|e| match e {
                rusqlite::Error::QueryReturnedNoRows => Ok(None),
                other => Err(other),
            })
        })
        .await
        .map_err(ApiError::from)?;

    let Some(content) = content else {
        return Ok((Vec::new(), Vec::new()));
    };

    let widened = (limit as usize).saturating_add(1).min(100);
    let hits = solo_query::doc_search::run_doc_search_inner(
        tenant.embedder(),
        tenant.hnsw(),
        tenant.read(),
        &content,
        widened,
    )
    .await
    .map_err(ApiError::from)?;

    let mut nodes = Vec::new();
    let mut edges = Vec::new();
    for hit in hits.into_iter() {
        if hit.chunk_id == chunk_id_for_self_excl {
            continue;
        }
        if nodes.len() as u32 >= limit {
            break;
        }
        let weight = (1.0 - hit.cos_distance).max(0.0);
        if weight < threshold {
            continue;
        }
        let target_id = format!("chunk:{}", hit.chunk_id);
        edges.push(GraphEdge {
            id: edge_id(node_id_full, "semantic", &target_id),
            source: node_id_full.to_string(),
            target: target_id,
            kind: "semantic",
            predicate: None,
            weight: Some(weight),
        });
        let exp = ExpandedChunk {
            chunk_id: hit.chunk_id.clone(),
            chunk_index: hit.chunk_index as i64,
            content: hit.content.clone(),
        };
        nodes.push(graph_node_for_chunk(tenant_id, &exp));
    }
    Ok((nodes, edges))
}

// ---------------------------------------------------------------------------
// /v1/graph/stream — SSE invalidation feed (v0.10.0)
//
// Powers solo-web's live-update behaviour: instead of polling, the
// frontend subscribes once and refetches its pages only when the
// writer-actor signals "your tenant's data changed". Per scoping doc
// §3 Decision C, the wire format is invalidation-shaped (not row
// payload) — the SSE channel says "refetch the affected page" rather
// than streaming actual rows.
//
// Wire format:
//
//   ```
//   event: init
//   data: {"connected": true, "tenant_id": "default", "ts_ms": 1715625600000}
//
//   event: invalidate
//   data: {"reason": "memory.remember", "tenant_id": "default",
//          "ts_ms": 1715625610000, "kind": "episode"}
//
//   event: heartbeat
//   data: {"ts_ms": 1715625640000}
//   ```
//
// Heartbeat: every [`STREAM_HEARTBEAT_SECS`] seconds, regardless of
// whether real events fired (simpler than resetting the timer on every
// invalidate; the cost is a few extra bytes per minute on idle).
//
// Lagged subscribers (subscriber polled slower than 256 writes) see one
// emit-only-once warning and resync via the next real `invalidate` —
// invalidation events are idempotent, so the missed batch reduces to a
// single refetch on the client side. No correctness loss.
//
// See `docs/dev-log/0117-graph-stream-impl.md` for the full design.
// ---------------------------------------------------------------------------

/// Heartbeat interval for `/v1/graph/stream`. Fires unconditionally
/// every 30 seconds — easier to reason about than "fire 30s after the
/// last event", and keeps proxies happy without code that races a
/// reset on every invalidate.
pub const STREAM_HEARTBEAT_SECS: u64 = 30;

/// SSE event name emitted on connection open. Single fire; client uses
/// this to confirm the subscription is live.
const STREAM_EVENT_INIT: &str = "init";

/// SSE event name emitted on every writer-actor commit (and on
/// `gdpr.forget_user`'s non-writer-actor cascade).
const STREAM_EVENT_INVALIDATE: &str = "invalidate";

/// SSE event name emitted by the heartbeat interval.
const STREAM_EVENT_HEARTBEAT: &str = "heartbeat";

/// `GET /v1/graph/stream` — Server-Sent Events feed of
/// `InvalidateEvent`s scoped to the request's tenant.
///
/// Subscribes to the per-tenant `broadcast::Sender<InvalidateEvent>`
/// held by `TenantHandle` (populated by `TenantHandle::open`). The
/// stream:
///
///   1. Emits one `event: init` line at connection open.
///   2. Selects between (broadcast recv) and (heartbeat tick) in a
///      loop, emitting `invalidate` / `heartbeat` events as either
///      fires.
///   3. Exits when the client closes the connection (axum drops the
///      response future) OR the broadcast Sender is dropped (tenant
///      shutdown).
///
/// Auth + tenant resolution mirror the rest of `/v1/graph/*`: the
/// `auth_middleware` returns 401 on missing bearer; the
/// `TenantExtractor` resolves the per-tenant DB. The handler itself
/// has no per-route auth logic.
async fn graph_stream_handler(
    TenantExtractor(tenant): TenantExtractor,
) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
    // Subscribe BEFORE building the init event so a writer-actor
    // commit that lands in the (microscopic) window between init and
    // the first poll is still observed. `broadcast::Receiver` buffers
    // up to the channel's capacity from the moment of subscribe.
    let rx = tenant.invalidate_sender().subscribe();
    let tenant_id = tenant.tenant_id().to_string();
    let stream = build_invalidate_stream(rx, tenant_id, STREAM_HEARTBEAT_SECS);
    // axum's keep-alive layer adds its own `:` comment line every
    // configured interval; we keep that OFF and ship our own typed
    // `heartbeat` event instead. The client distinguishes the two by
    // looking at the SSE `event:` field — typed heartbeats let solo-web
    // surface "connection healthy" in its UI without parsing comment
    // lines.
    Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)))
}

/// Per-subscriber state threaded through `futures::stream::unfold`.
/// Carries the receiver + heartbeat interval + a one-shot flag for
/// the initial `init` event.
struct StreamState {
    rx: broadcast::Receiver<InvalidateEvent>,
    heartbeat: tokio::time::Interval,
    tenant_id: String,
    /// `true` until the first poll completes — used to gate the `init`
    /// event. Flipped to `false` after the init event yields.
    needs_init: bool,
}

/// Build the stream of SSE [`Event`]s for one subscriber.
///
/// First yield is the `init` event. After that, the stream selects
/// between the broadcast receiver and a tokio interval timer that
/// fires every `heartbeat_secs` seconds. Lagged broadcast errors are
/// swallowed with a single `tracing::warn!` line — the client resyncs
/// on the next real invalidate (invalidation events are idempotent).
fn build_invalidate_stream(
    rx: broadcast::Receiver<InvalidateEvent>,
    tenant_id: String,
    heartbeat_secs: u64,
) -> impl Stream<Item = Result<Event, Infallible>> {
    // `tokio::time::interval_at(start, period)` starts ticking at
    // `start`; we set `start = now + period` so the first heartbeat
    // lands `heartbeat_secs` AFTER the init event. Without `interval_at`
    // the default `interval()` would fire immediately at t=0, racing
    // the init event.
    let start_at = tokio::time::Instant::now() + Duration::from_secs(heartbeat_secs);
    let heartbeat =
        tokio::time::interval_at(start_at, Duration::from_secs(heartbeat_secs));

    let state = StreamState {
        rx,
        heartbeat,
        tenant_id,
        needs_init: true,
    };
    futures::stream::unfold(state, move |mut state| async move {
        // First-poll: yield the init event without touching the
        // receiver or the heartbeat. Subsequent polls fall through to
        // the select loop.
        if state.needs_init {
            state.needs_init = false;
            let init_payload = serde_json::json!({
                "connected": true,
                "tenant_id": state.tenant_id,
                "ts_ms": chrono::Utc::now().timestamp_millis(),
            });
            let ev = Event::default()
                .event(STREAM_EVENT_INIT)
                .json_data(init_payload)
                .unwrap_or_else(|_| Event::default().event(STREAM_EVENT_INIT));
            return Some((Ok::<Event, Infallible>(ev), state));
        }
        loop {
            tokio::select! {
                event = state.rx.recv() => {
                    match event {
                        Ok(ev) => {
                            let sse_event = Event::default()
                                .event(STREAM_EVENT_INVALIDATE)
                                .json_data(&ev)
                                .unwrap_or_else(|_| Event::default()
                                    .event(STREAM_EVENT_INVALIDATE));
                            return Some((Ok::<Event, Infallible>(sse_event), state));
                        }
                        Err(broadcast::error::RecvError::Lagged(n)) => {
                            tracing::warn!(
                                lagged = n,
                                "graph stream subscriber lagged; client will \
                                 resync on the next real invalidate"
                            );
                            // Continue receiving — do NOT yield anything
                            // for a lag.
                        }
                        Err(broadcast::error::RecvError::Closed) => {
                            tracing::debug!(
                                "graph stream broadcast closed; ending SSE stream"
                            );
                            return None;
                        }
                    }
                }
                _ = state.heartbeat.tick() => {
                    let hb_payload = serde_json::json!({
                        "ts_ms": chrono::Utc::now().timestamp_millis(),
                    });
                    let sse_event = Event::default()
                        .event(STREAM_EVENT_HEARTBEAT)
                        .json_data(hb_payload)
                        .unwrap_or_else(|_| Event::default()
                            .event(STREAM_EVENT_HEARTBEAT));
                    return Some((Ok::<Event, Infallible>(sse_event), state));
                }
            }
        }
    })
}

// ---------------------------------------------------------------------------
// /v1/tenants — principal-scoped tenant list (v0.10.0 + v0.10.1 hydration)
//
// Powers solo-web's top-bar tenant picker (Decision F in
// `docs/dev-log/0105-solo-web-scoping.md` §3, route shape locked in §4
// Route 6). The endpoint is **read-only**; admin CRUD (create / delete /
// rename / quota change) remains CLI-only per ADR-0004 §"Admin operations".
// That keeps the privileged tenant-mutation surface off HTTP entirely
// while still letting an authenticated browser session enumerate the
// tenants it's allowed to see.
//
// Wire shape (200 OK):
//
//   ```json
//   {
//     "tenants": [
//       {
//         "id": "default",
//         "display_name": "Default tenant",
//         "created_at_ms": 1715625600000,
//         "last_accessed_ms": 1715625900000,
//         "status": "active",
//         "quota_bytes": null,
//         "episode_count": null,
//         "size_bytes": null,
//         "pct_used": null
//       }
//     ]
//   }
//   ```
//
// The numeric `episode_count` / `size_bytes` / `pct_used` fields were
// **always `null` in v0.10.0** (cost-deferred). v0.10.1 hydrates them
// for real via `TenantRegistry::hydrate_tenant_cost_numbers`:
//
//   * `size_bytes` — `std::fs::metadata(<data_dir>/tenants/<db>.db).len()`.
//     Cheap; runs for every visible tenant.
//   * `episode_count` — `SELECT COUNT(*) FROM episodes WHERE
//     status='active'` against the per-tenant SQLCipher DB.
//   * `pct_used` — `size_bytes * 100 / quota_bytes` (f64, capped at
//     100.0) when both are known; `null` if `quota_bytes` is unset.
//
// **Cap**: opening + counting N tenant DBs is N×~10ms; the first-paint
// budget is tight, so we cap `episode_count` hydration at
// `TENANTS_COUNT_HYDRATION_CAP` (50) per request. Tenants beyond the
// cap get `episode_count: null` and the response carries an
// `X-Solo-Tenants-Count-Cap-Reached: true` header so clients can fetch
// counts for the tail tenants out-of-band if needed (mirroring the
// entity-cap pattern from `/v1/graph/nodes`). `size_bytes` is not
// capped — it's just a `metadata` call.
//
// The CLI's `solo tenants list` retains the canonical per-tenant
// cost-numbers path for operators who need exhaustive data.
//
// ## Visibility filter (load-bearing — three cases)
//
// The handler reads `AuthenticatedPrincipal` out of request extensions
// via `MaybePrincipal` and filters the registry list before
// serialisation:
//
//   1. **No principal** (`MaybePrincipal(None)`) — unauthenticated
//      loopback path, no `[auth]` block in `solo.config.toml`. Return
//      every `Active` tenant. Same scope as `solo tenants list` CLI.
//   2. **Bearer principal** (`subject == "bearer" && claims.is_null()`,
//      the `AuthenticatedPrincipal::bearer` signature emitted by
//      `BearerValidator::validate`). Single-principal daemon — the
//      bearer holder is the operator, so return every `Active`
//      tenant. Functionally equivalent to (1) from a leakage
//      standpoint.
//   3. **OIDC principal** (any other principal — `claims` carries the
//      JWT object). Filter to ONLY the tenant id matching
//      `principal.tenant_claim`. The configured OIDC tenant_claim is
//      already validated to a real `TenantId` by the auth middleware
//      (a `MissingTenantClaim` or `InvalidTenantClaim` shorts out at
//      403 BEFORE this handler runs). If the claim doesn't match any
//      registered tenant, return `{"tenants": []}` (200 OK, NOT 404)
//      — don't leak whether a tenant exists by 404'ing on names
//      outside the principal's scope.
//
// `PendingMigration` / `PendingDelete` tenants are **excluded** from the
// list in every case. solo-web's tenant picker should not surface a
// tenant that's mid-migration or queued for hard-delete — clicking
// such a row would race the admin tooling. The CLI's `solo tenants
// list` still shows them under an explicit `--include-pending` flag
// (out of scope here).
//
// See `docs/dev-log/0119-tenants-list-impl.md` for the full design.
// ---------------------------------------------------------------------------

/// One row of the `/v1/tenants` response body. Shape mirrors
/// `solo_storage::TenantRecord` for the persisted fields plus the
/// reserved-for-future cost-numbers triple (`episode_count`,
/// `size_bytes`, `pct_used`) that v0.10.0 always sets to `null`.
#[derive(Debug, Clone, Serialize)]
struct TenantListItem {
    /// Tenant id (e.g. `"default"`, `"alice"`). Matches the
    /// `X-Solo-Tenant` header value clients send to other routes.
    id: String,
    /// Human-readable display name set at `solo tenants create`.
    /// `None` ⇒ omit from the JSON body.
    #[serde(skip_serializing_if = "Option::is_none")]
    display_name: Option<String>,
    /// Epoch ms when this tenant was registered.
    created_at_ms: i64,
    /// Epoch ms of the most recent `TenantRegistry::get_or_open` call
    /// (v0.9.0 P1). `None` for tenants that have never been opened
    /// since the migration ran.
    #[serde(skip_serializing_if = "Option::is_none")]
    last_accessed_ms: Option<i64>,
    /// Lifecycle status. Always `"active"` in the v0.10.0 wire (we
    /// filter `PendingMigration` / `PendingDelete` out at list time).
    /// Surfaced for forward-compat — a future `?include_pending=1`
    /// query param could relax the filter without a shape change.
    status: TenantStatusJson,
    /// Per-tenant byte quota set via `solo tenants set-quota`. `None`
    /// ⇒ unlimited.
    #[serde(skip_serializing_if = "Option::is_none")]
    quota_bytes: Option<u64>,
    /// v0.10.1: count of `episodes WHERE status='active'`. Populated
    /// for the first `TENANTS_COUNT_HYDRATION_CAP` tenants in the
    /// response; `null` for tenants beyond the cap (in which case the
    /// response also carries `X-Solo-Tenants-Count-Cap-Reached: true`).
    /// Also `null` if the per-tenant DB file is missing or the COUNT
    /// failed.
    episode_count: Option<i64>,
    /// v0.10.1: size of the per-tenant SQLCipher DB on disk (bytes).
    /// `null` only if the file is missing or unreadable (corruption /
    /// permissions). Not affected by the cap — `std::fs::metadata` is
    /// cheap.
    size_bytes: Option<u64>,
    /// v0.10.1: `(size_bytes * 100.0 / quota_bytes)` capped at `100.0`
    /// when both `size_bytes` and `quota_bytes` are known. `null` if
    /// `quota_bytes` is unset (no quota = unlimited) or `size_bytes`
    /// is unknown.
    pct_used: Option<f64>,
}

/// JSON-side mirror of [`TenantStatus`]. Re-defined here (rather than
/// using `#[derive(Serialize)]` on `TenantStatus` directly — which it
/// already has via `#[serde(rename_all = "snake_case")]`) so the
/// HTTP-side wire shape stays decoupled from the storage-side enum.
/// Today both serialise identically; a future status variant added to
/// storage doesn't automatically leak onto the wire.
#[derive(Debug, Clone, Copy, Serialize)]
#[serde(rename_all = "snake_case")]
enum TenantStatusJson {
    Active,
}

impl From<&solo_storage::TenantStatus> for TenantStatusJson {
    fn from(s: &solo_storage::TenantStatus) -> Self {
        // We only ever build this enum from `Active` records (the list
        // handler filters at source); the match exhausts so future
        // variants force a compile error here, not a wire mismatch.
        match s {
            solo_storage::TenantStatus::Active => TenantStatusJson::Active,
            // Defensive: should be filtered upstream. Map to Active to
            // avoid a panic, but the handler MUST keep filtering at
            // source. A clippy warning catches dead branches.
            solo_storage::TenantStatus::PendingMigration
            | solo_storage::TenantStatus::PendingDelete => TenantStatusJson::Active,
        }
    }
}

/// Response body for `GET /v1/tenants`.
#[derive(Debug, Serialize)]
struct TenantsListResponse {
    tenants: Vec<TenantListItem>,
}

/// v0.10.1: maximum number of tenants whose `episode_count` we hydrate
/// per `/v1/tenants` request. Opening + counting one tenant DB is
/// ~5-10ms; capping bounds the per-request wall to keep solo-web's
/// first-paint budget tight. Tenants beyond the cap get
/// `episode_count: null` AND the response carries
/// `X-Solo-Tenants-Count-Cap-Reached: true` so clients can fetch
/// per-tenant counts out-of-band (CLI / future per-id endpoint) for
/// the tail. The 50 figure mirrors the entity-cap pattern from
/// `/v1/graph/nodes`.
const TENANTS_COUNT_HYDRATION_CAP: usize = 50;

/// v0.10.1: response header name set to `"true"` when the per-request
/// `episode_count` hydration cap was reached. Absent otherwise.
/// Grep-able by both server- and client-side code. Stored lowercase
/// per `axum::http::HeaderName::from_static` (header names are
/// case-insensitive on the wire; the canonical spelling is
/// `X-Solo-Tenants-Count-Cap-Reached`).
const X_SOLO_TENANTS_COUNT_CAP_HEADER: &str = "x-solo-tenants-count-cap-reached";

/// `GET /v1/tenants` — list every tenant visible to the request's
/// principal. See module comment for the three-case visibility rule.
///
/// Errors:
///   * **401** — bearer required but missing/invalid (handled by
///     `auth_middleware` before this handler runs).
///   * **500** — `TenantsIndex` read failed. Surfaced via [`ApiError`].
///
/// No 404 path. If the OIDC principal's `tenant_claim` doesn't match
/// any registered tenant, the response is `200 OK` with `tenants:
/// []`. That keeps tenant existence out of side-channel range for an
/// OIDC user — they cannot probe for other tenants by id.
async fn tenants_list_handler(
    State(state): State<SoloHttpState>,
    MaybePrincipal(maybe_principal): MaybePrincipal,
) -> Result<Response, ApiError> {
    // Pull every registered tenant. `list_active` is the registry's
    // wrapper around `TenantsIndex::list`, which returns rows ordered
    // by `(created_at_ms ASC, tenant_id ASC)` — a stable order that
    // doesn't shift between requests, which solo-web relies on to keep
    // its tenant picker entries from reordering visually.
    let mut records = state.registry.list_active().await.map_err(ApiError::from)?;

    // Filter at source: status MUST be Active (PendingMigration /
    // PendingDelete are admin-transient states that solo-web should
    // not surface). Matches the brief's
    // `tenants_status_filter_excludes_deleted` test.
    records.retain(|r| matches!(r.status, solo_storage::TenantStatus::Active));

    // Apply the principal-driven visibility filter. The three cases
    // are exhaustive — see the module comment for the rationale on
    // each. `tenant_visibility_filter` is split out so the unit
    // tests can assert the rule independent of the SQL read.
    let filtered = filter_tenants_for_principal(records, maybe_principal.as_ref());

    // v0.10.1: hydrate cost numbers (size_bytes, episode_count). The
    // registry helper handles missing DB files + the cap behavior. We
    // pass the cap so tenants beyond it return `None` for episode_count
    // — `size_bytes` is computed for everyone (cheap fs::metadata).
    let cap = TENANTS_COUNT_HYDRATION_CAP;
    let costs = state
        .registry
        .hydrate_tenant_cost_numbers(&filtered, cap)
        .await;
    let cap_reached = filtered.len() > cap;

    let tenants: Vec<TenantListItem> = filtered
        .iter()
        .zip(costs.iter())
        .map(|(r, cost)| {
            let pct_used = match (cost.size_bytes, r.quota_bytes) {
                (Some(size), Some(quota)) if quota > 0 => {
                    let raw = (size as f64) * 100.0 / (quota as f64);
                    Some(raw.min(100.0))
                }
                _ => None,
            };
            TenantListItem {
                id: r.tenant_id.to_string(),
                display_name: r.display_name.clone(),
                created_at_ms: r.created_at_ms,
                last_accessed_ms: r.last_accessed_ms,
                status: TenantStatusJson::from(&r.status),
                quota_bytes: r.quota_bytes,
                episode_count: cost.episode_count,
                size_bytes: cost.size_bytes,
                pct_used,
            }
        })
        .collect();

    let body = Json(TenantsListResponse { tenants });
    if cap_reached {
        let mut resp = body.into_response();
        resp.headers_mut().insert(
            axum::http::HeaderName::from_static(X_SOLO_TENANTS_COUNT_CAP_HEADER),
            axum::http::HeaderValue::from_static("true"),
        );
        Ok(resp)
    } else {
        Ok(body.into_response())
    }
}

/// Pure function: apply the three-case principal-driven visibility
/// rule to a list of `TenantRecord`s. Extracted from the handler so
/// unit tests can exercise the rule without driving an axum router.
///
///   * `principal == None` ⇒ all records returned (no-auth path).
///   * Bearer-shaped principal (`subject == "bearer" && claims.is_null()`)
///     ⇒ all records returned (single-principal daemon).
///   * Any other principal (OIDC) ⇒ filter to records whose
///     `tenant_id == principal.tenant_claim`. An OIDC principal with
///     no `tenant_claim` (theoretically unreachable — the middleware
///     short-circuits at 403 before us, but we defend) returns an
///     empty list.
fn filter_tenants_for_principal(
    records: Vec<solo_storage::TenantRecord>,
    principal: Option<&AuthenticatedPrincipal>,
) -> Vec<solo_storage::TenantRecord> {
    let Some(p) = principal else {
        // Case 1: no auth configured — return all tenants. Same scope
        // as `solo tenants list`.
        return records;
    };
    if is_single_principal_bearer(p) {
        // Case 2: bearer principal — return all tenants. The single
        // bearer holder is functionally the daemon operator.
        return records;
    }
    // Case 3: OIDC principal — filter to the claimed tenant only. An
    // unmatched claim falls through to an empty list, NOT 404, to
    // avoid leaking tenant existence.
    let Some(claim) = p.tenant_claim.as_ref() else {
        return Vec::new();
    };
    records
        .into_iter()
        .filter(|r| r.tenant_id == *claim)
        .collect()
}

/// True iff `principal` looks like a bearer-mode principal — the shape
/// emitted by [`AuthenticatedPrincipal::bearer`]: subject is literally
/// `"bearer"`, claims is `serde_json::Value::Null`, and scopes is
/// empty. OIDC principals carry a JWT object in `claims` and the JWT
/// `sub` in `subject`, so they fail this predicate.
///
/// Split out so the unit tests can assert the discriminator
/// independent of the rest of the handler. Keeping the predicate in
/// one place also makes future expansion easier — e.g., a v0.11
/// "admin scope" might add an OIDC variant that passes this gate by
/// looking for a `"solo:admin"` entry in `scopes`.
fn is_single_principal_bearer(principal: &AuthenticatedPrincipal) -> bool {
    principal.subject == "bearer"
        && principal.claims.is_null()
        && principal.scopes.is_empty()
}

// ---------------------------------------------------------------------------
// v0.10.2 — MCP-over-HTTP transport on /mcp
// ---------------------------------------------------------------------------

/// Initial event name emitted by `GET /mcp` when an SSE client connects.
/// Used by browser-based MCP clients (e.g. the AI SDK's
/// `experimental_createMCPClient` with the SSE transport) to confirm the
/// stream is live before they begin polling for server-initiated
/// notifications. v0.10.2 keeps the stream idle after this event —
/// server-initiated notifications come in v0.10.3+.
pub const MCP_STREAM_EVENT_INIT: &str = "init";

/// `POST /mcp` — JSON-RPC request/response.
///
/// v0.10.2 P2 entry point. Per the MCP Streamable HTTP transport spec,
/// the body is one JSON-RPC 2.0 envelope (`{jsonrpc, id, method,
/// params}`). The response is one JSON-RPC envelope (`{jsonrpc, id,
/// result}` or `{jsonrpc, id, error}`) with `Content-Type:
/// application/json`. **Status 200** for valid JSON-RPC (in-body
/// errors); **status 400** for malformed JSON; **status 401** when
/// auth is configured and the bearer check fails (handled by the
/// `auth_middleware` ahead of this handler).
///
/// Tenant resolution diverges from `solo mcp-stdio` here: stdio binds
/// one tenant at process start via `--tenant`. HTTP resolves the tenant
/// per request from the `X-Solo-Tenant` header (or
/// `AuthenticatedPrincipal.tenant_claim` in OIDC mode), so a single
/// daemon process can answer MCP calls for any tenant the registry
/// knows. The audit principal is `Some("bearer")` for bearer-
/// authenticated calls and the JWT `sub` for OIDC; `None` for
/// unauthenticated loopback. Documented in v0.10.2 dev log.
async fn mcp_http_post_handler(
    TenantExtractor(tenant): TenantExtractor,
    State(state): State<SoloHttpState>,
    AuditPrincipal(principal): AuditPrincipal,
    body: axum::body::Bytes,
) -> Response {
    // Parse the JSON-RPC envelope. Malformed input ⇒ 400 (the spec
    // calls out 4xx for malformed wire input even though JSON-RPC's own
    // parse-error code is in-body — operator-facing tooling needs the
    // HTTP status to distinguish "the server rejected the request
    // shape" from "the method returned an error").
    let request: crate::mcp_dispatch::JsonRpcRequest = match serde_json::from_slice(&body) {
        Ok(r) => r,
        Err(e) => {
            return (
                StatusCode::BAD_REQUEST,
                Json(serde_json::json!({
                    "error": format!("invalid JSON-RPC request: {e}"),
                    "status": 400,
                })),
            )
                .into_response();
        }
    };
    if request.jsonrpc != "2.0" {
        return (
            StatusCode::BAD_REQUEST,
            Json(serde_json::json!({
                "error": format!(
                    "invalid JSON-RPC request: expected jsonrpc=\"2.0\", got {:?}",
                    request.jsonrpc
                ),
                "status": 400,
            })),
        )
            .into_response();
    }

    // Build the dispatcher with the resolved tenant + audit principal.
    let dispatcher = crate::mcp_dispatch::McpDispatcher::new(
        state.registry.clone(),
        tenant,
        (*state.user_aliases).clone(),
        principal,
    );

    match dispatcher.dispatch(request).await {
        Some(response) => {
            // JSON-RPC errors are in-body; the HTTP status is 200 for
            // any valid JSON-RPC request, including ones that return an
            // error envelope. The client distinguishes success from
            // error by the presence of `result` vs `error` in the body.
            (StatusCode::OK, Json(response)).into_response()
        }
        None => {
            // Notification: per JSON-RPC 2.0 §4.1 the server MUST NOT
            // respond. The MCP Streamable HTTP transport spec uses
            // 202 Accepted for this shape so client-side polling does
            // not block on a body.
            StatusCode::ACCEPTED.into_response()
        }
    }
}

/// `GET /mcp` — SSE init stream.
///
/// Per the MCP Streamable HTTP transport spec, the GET endpoint is an
/// SSE stream the server can use to push server-initiated messages
/// (notifications, progress events, sampling requests). v0.10.2 keeps
/// this minimal: emit one `event: init` on connect, then idle. Real
/// server-initiated traffic ships in v0.10.3+ when sessions land.
///
/// The stream lives behind `TenantExtractor` so the connection still
/// binds a tenant (browser clients open this once per tab) — future
/// session affinity will tie session ids to the tenant resolved here.
async fn mcp_http_get_handler(
    TenantExtractor(tenant): TenantExtractor,
) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
    let tenant_id = tenant.tenant_id().to_string();
    let stream = build_mcp_init_stream(tenant_id);
    // No keep-alive comment lines — clients distinguish the typed
    // `init` event from any future typed `heartbeat` event the way
    // `/v1/graph/stream` already does. Matching the longer interval
    // here (3600s) means the stream parks indefinitely after the init
    // event in v0.10.2; v0.10.3+ wires real notifications.
    Sse::new(stream).keep_alive(KeepAlive::new().interval(Duration::from_secs(3600)))
}

/// Build the v0.10.2 `/mcp` GET stream: one `event: init` then idle.
fn build_mcp_init_stream(
    tenant_id: String,
) -> impl Stream<Item = Result<Event, Infallible>> {
    futures::stream::unfold(Some(tenant_id), move |state| async move {
        let Some(tenant_id) = state else {
            // Park the stream forever once init has been emitted.
            // Future versions will replace this with a real
            // notification-receiver `select!` loop.
            std::future::pending::<()>().await;
            return None;
        };
        let init_payload = serde_json::json!({
            "connected": true,
            "tenant_id": tenant_id,
            "ts_ms": chrono::Utc::now().timestamp_millis(),
        });
        let ev = Event::default()
            .event(MCP_STREAM_EVENT_INIT)
            .json_data(init_payload)
            .unwrap_or_else(|_| Event::default().event(MCP_STREAM_EVENT_INIT));
        Some((Ok::<Event, Infallible>(ev), None))
    })
}

// ---------------------------------------------------------------------------
// Error mapping
// ---------------------------------------------------------------------------

#[derive(Debug)]
pub struct ApiError {
    status: StatusCode,
    message: String,
}

impl ApiError {
    fn bad_request(msg: impl Into<String>) -> Self {
        Self {
            status: StatusCode::BAD_REQUEST,
            message: msg.into(),
        }
    }
    fn not_found(msg: impl Into<String>) -> Self {
        Self {
            status: StatusCode::NOT_FOUND,
            message: msg.into(),
        }
    }
    fn internal(msg: impl Into<String>) -> Self {
        Self {
            status: StatusCode::INTERNAL_SERVER_ERROR,
            message: msg.into(),
        }
    }
}

impl From<solo_core::Error> for ApiError {
    fn from(e: solo_core::Error) -> Self {
        use solo_core::Error;
        match e {
            Error::NotFound(msg) => ApiError::not_found(msg),
            Error::InvalidInput(msg) => ApiError::bad_request(msg),
            Error::Conflict(msg) => Self {
                status: StatusCode::CONFLICT,
                message: msg,
            },
            other => ApiError::internal(other.to_string()),
        }
    }
}

impl IntoResponse for ApiError {
    fn into_response(self) -> Response {
        let body = serde_json::json!({
            "error": self.message,
            "status": self.status.as_u16(),
        });
        (self.status, Json(body)).into_response()
    }
}

// SQL helper for recall used to live here; consolidated into
// solo_query::recall.

#[cfg(test)]
mod handler_tests {
    //! In-process integration tests for the HTTP handler surface. We
    //! drive the axum Router directly via `tower::ServiceExt::oneshot`
    //! — no real TCP listener needed. Same `Harness`-shape as the MCP
    //! tests: real WriterActor + ReaderPool + StubEmbedder + StubVectorIndex.
    //!
    //! Tests live inline in this module rather than in a `tests/` dir
    //! because external integration-test exes triggered Windows UAC
    //! ERROR_ELEVATION_REQUIRED on the dev machine.
    use super::*;
    use axum::body::Body;
    use axum::http::{Request, StatusCode};
    use http_body_util::BodyExt;
    use serde_json::{Value, json};
    use solo_storage::test_support::StubVectorIndex;
    use solo_storage::{
        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig,
        StubEmbedder, TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
    };
    use solo_core::VectorIndex;
    use std::sync::Arc as StdArc;
    use tower::ServiceExt;

    fn fake_config(dim: u32) -> SoloConfig {
        SoloConfig {
            schema_version: 1,
            salt_hex: "00000000000000000000000000000000".to_string(),
            embedder: EmbedderConfig {
                name: "stub".to_string(),
                version: "v1".to_string(),
                dim,
                dtype: "f32".to_string(),
            },
            identity: IdentityConfig::default(),
            documents: solo_storage::DocumentConfig::default(),
            auth: None,
            audit: solo_storage::AuditSettings::default(),
            redaction: solo_storage::RedactionConfig::default(),
            llm: None,
            triples: solo_storage::TriplesConfig::default(),
            sampling: solo_storage::SamplingConfig::default(),
        }
    }

    struct Harness {
        router: axum::Router,
        _tmp: tempfile::TempDir,
        db_path: std::path::PathBuf,
        write_handle_extra: Option<solo_storage::WriteHandle>,
        join: Option<std::thread::JoinHandle<()>>,
        /// v0.10.0: handle to the per-tenant TenantHandle so SSE-flavoured
        /// tests can call `harness.invalidate_sender().send(...)` to
        /// simulate writer-actor invalidations (or grab a Receiver via
        /// `.subscribe()` for subscriber-count assertions).
        tenant_handle: StdArc<TenantHandle>,
        /// v0.10.0: clone of the registry Arc so `/v1/tenants` tests can
        /// seed additional tenant rows into the in-memory tenants_index
        /// stub via `registry.with_index(|idx| idx.register(...))`.
        registry: StdArc<TenantRegistry>,
    }

    impl Harness {
        /// v0.10.0: clone the per-tenant broadcast Sender so tests can
        /// fire `InvalidateEvent`s directly without going through the
        /// writer-actor. The harness's writer is spawned via
        /// `WriterActor::spawn_full` (legacy variant, no invalidate
        /// plumb) so writer-driven events won't reach SSE subscribers
        /// in tests — tests use this Sender to simulate them.
        fn invalidate_sender(&self) -> tokio::sync::broadcast::Sender<InvalidateEvent> {
            self.tenant_handle.invalidate_sender().clone()
        }
    }

    impl Harness {
        fn new(runtime: &tokio::runtime::Runtime) -> Self {
            Self::new_with_auth(runtime, None)
        }

        /// Open a fresh side connection against the harness's DB. Used
        /// by graph_expand tests to seed clusters / triples / documents
        /// directly (the writer-actor doesn't expose those write paths).
        fn open_db(&self) -> rusqlite::Connection {
            solo_storage::test_support::open_test_db_at(&self.db_path)
        }

        fn new_with_auth(
            runtime: &tokio::runtime::Runtime,
            bearer_token: Option<String>,
        ) -> Self {
            Self::new_with_auth_config(
                runtime,
                bearer_token.map(|token| crate::auth::AuthConfig::Bearer { token }),
            )
        }

        fn new_with_auth_config(
            runtime: &tokio::runtime::Runtime,
            auth: Option<crate::auth::AuthConfig>,
        ) -> Self {
            use solo_storage::embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};

            let tmp = tempfile::TempDir::new().unwrap();
            let dim = 16usize;
            let hnsw: StdArc<dyn VectorIndex + Send + Sync> = StdArc::new(StubVectorIndex::new(dim));
            let embedder: StdArc<dyn solo_core::Embedder> =
                StdArc::new(StubEmbedder::new("stub", "v1", dim));
            let path = tmp.path().join("test.db");

            let embedder_id = {
                let conn = solo_storage::test_support::open_test_db_at(&path);
                get_or_insert_embedder_id(
                    &conn,
                    &EmbedderIdentity {
                        name: "stub".into(),
                        version: "v1".into(),
                        dim: dim as u32,
                        dtype: "f32".into(),
                    },
                )
                .unwrap()
            };

            let conn = solo_storage::test_support::open_test_db_at(&path);
            let WriterSpawn { handle, join } = WriterActor::spawn_full(
                conn,
                hnsw.clone(),
                tmp.path().to_path_buf(),
                embedder_id,
            );
            let pool: ReaderPool =
                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });

            // Build a TenantHandle from the assembled parts and wrap it
            // in a single-tenant test registry.
            let tenant_id = solo_core::TenantId::default_tenant();
            let tenant_handle = StdArc::new(
                TenantHandle::from_parts_for_tests(
                    tenant_id.clone(),
                    fake_config(dim as u32),
                    path.clone(),
                    tmp.path().to_path_buf(),
                    embedder_id,
                    hnsw,
                    embedder.clone(),
                    handle.clone(),
                    // The harness owns ANOTHER WriteHandle clone + the join.
                    // We give the TenantHandle a dummy join that immediately
                    // returns — it never gets joined because shutdown_all
                    // can't get exclusive Arc ownership when the harness
                    // also holds a writer clone.
                    std::thread::spawn(|| {}),
                    pool,
                ),
            );
            let tenant_handle_clone = tenant_handle.clone();

            // Suppress the auto-spawned dummy thread by letting it finish.
            // We DON'T put the real `join` into the TenantHandle because
            // we keep our own clone of `handle` for the shutdown path.
            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
                tmp.path().to_path_buf(),
                key,
                embedder,
                tenant_handle,
            ));
            let registry_clone = registry.clone();

            let state = SoloHttpState {
                registry,
                default_tenant: tenant_id,
                user_aliases: Arc::new(Vec::new()),
            };
            let router = router_with_auth_config(state, auth);
            Harness {
                router,
                _tmp: tmp,
                db_path: path,
                write_handle_extra: Some(handle),
                join: Some(join),
                tenant_handle: tenant_handle_clone,
                registry: registry_clone,
            }
        }

        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
            let join = self.join.take();
            let extra = self.write_handle_extra.take();
            // v0.10.0: the new `tenant_handle` Harness field holds another
            // `Arc<TenantHandle>` that owns its own WriteHandle clone.
            // We must drop our reference here so the inner WriteHandle
            // can be released when the registry drops below. Without
            // this, the writer thread's mpsc never closes and the join
            // times out at 5s.
            let tenant_handle = self.tenant_handle;
            // v0.10.0: same story for the new `registry` Arc clone the
            // tenants-list tests use to seed extra index rows — the
            // state inside the router holds one Arc, this is the
            // other; both must drop before the underlying registry
            // dies and releases its index-mutex / cached handles.
            let registry = self.registry;
            runtime.block_on(async move {
                drop(extra);
                drop(tenant_handle); // drop Harness's direct tenant Arc
                drop(registry); // drop Harness's direct registry Arc
                drop(self.router); // drops state → drops pool inside runtime ctx
                drop(self._tmp);
                if let Some(join) = join {
                    let (tx, rx) = std::sync::mpsc::channel();
                    std::thread::spawn(move || {
                        let _ = tx.send(join.join());
                    });
                    tokio::task::spawn_blocking(move || {
                        rx.recv_timeout(std::time::Duration::from_secs(5))
                    })
                    .await
                    .expect("blocking task")
                    .expect("writer thread did not exit within 5s")
                    .expect("writer thread panicked");
                }
            });
        }
    }

    fn rt() -> tokio::runtime::Runtime {
        tokio::runtime::Builder::new_multi_thread()
            .worker_threads(2)
            .enable_all()
            .build()
            .unwrap()
    }

    /// Issue one HTTP request through the router and capture status +
    /// JSON body. `body` may be `None` for GET/DELETE; `auth` adds an
    /// `Authorization` header value verbatim (e.g. `"Bearer xyz"`).
    async fn call(
        router: axum::Router,
        method: &str,
        uri: &str,
        body: Option<Value>,
    ) -> (StatusCode, Value) {
        call_with_auth(router, method, uri, body, None).await
    }

    async fn call_with_auth(
        router: axum::Router,
        method: &str,
        uri: &str,
        body: Option<Value>,
        auth: Option<&str>,
    ) -> (StatusCode, Value) {
        let mut req_builder = Request::builder()
            .method(method)
            .uri(uri)
            .header("content-type", "application/json");
        if let Some(a) = auth {
            req_builder = req_builder.header("authorization", a);
        }
        let req = if let Some(b) = body {
            let bytes = serde_json::to_vec(&b).unwrap();
            req_builder.body(Body::from(bytes)).unwrap()
        } else {
            req_builder = req_builder.header("content-length", "0");
            req_builder.body(Body::empty()).unwrap()
        };
        let resp = router.oneshot(req).await.expect("oneshot");
        let status = resp.status();
        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
        let v: Value = if body_bytes.is_empty() {
            Value::Null
        } else {
            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
        };
        (status, v)
    }

    #[test]
    fn health_returns_ok() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
        assert_eq!(status, StatusCode::OK);
        h.shutdown(&runtime);
    }

    /// `GET /openapi.json` returns a parseable OpenAPI 3.x document with
    /// the four `memory.*` endpoints + their request/response schemas.
    /// Acts as a drift detector: if a future commit adds/removes a route
    /// without updating `openapi_spec`, this test fails loudly.
    #[test]
    fn openapi_json_describes_all_endpoints() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, spec) = runtime.block_on(call(r, "GET", "/openapi.json", None));
        assert_eq!(status, StatusCode::OK);
        assert!(spec.is_object(), "openapi.json must be a JSON object");

        // Top-level shape per OpenAPI 3.1.
        assert!(
            spec.get("openapi")
                .and_then(|v| v.as_str())
                .is_some_and(|s| s.starts_with("3.")),
            "missing or wrong openapi version: {spec}"
        );
        assert!(spec.pointer("/info/title").is_some());
        assert!(spec.pointer("/info/version").is_some());

        // Every route the router serves must be documented.
        let paths = spec
            .get("paths")
            .and_then(|v| v.as_object())
            .expect("paths must be an object");
        for expected in [
            "/health",
            "/openapi.json",
            "/memory",
            "/memory/search",
            "/memory/consolidate",
            "/memory/{id}",
            // Path 1 derived-layer endpoints (v0.4.0+):
            "/memory/themes",
            "/memory/facts_about",
            "/memory/contradictions",
            // v0.5.0 Priority 3:
            "/memory/clusters/{cluster_id}",
            // v0.7.0 P6 — document operations:
            "/memory/documents",
            "/memory/documents/search",
            "/memory/documents/{id}",
        ] {
            assert!(
                paths.contains_key(expected),
                "openapi paths missing {expected}: {paths:?}"
            );
        }

        // Method coverage on /memory/documents: must document both POST
        // (ingest) and GET (list).
        let docs = paths.get("/memory/documents").expect("/memory/documents");
        assert!(docs.get("post").is_some(), "POST /memory/documents undocumented");
        assert!(docs.get("get").is_some(), "GET /memory/documents undocumented");

        // Method coverage on /memory/documents/{id}: must document both
        // GET (inspect) and DELETE (forget).
        let docid = paths
            .get("/memory/documents/{id}")
            .expect("/memory/documents/{id}");
        assert!(
            docid.get("get").is_some(),
            "GET /memory/documents/{{id}} undocumented"
        );
        assert!(
            docid.get("delete").is_some(),
            "DELETE /memory/documents/{{id}} undocumented"
        );

        // Method coverage on /memory/{id}: must document both GET (inspect)
        // and DELETE (forget).
        let memid = paths.get("/memory/{id}").expect("memory/{id}");
        assert!(memid.get("get").is_some(), "GET /memory/{{id}} undocumented");
        assert!(
            memid.get("delete").is_some(),
            "DELETE /memory/{{id}} undocumented"
        );

        // Component schemas referenced from paths must be defined.
        for schema_name in [
            "RememberRequest",
            "RememberResponse",
            "RecallRequest",
            "RecallResult",
            "EpisodeRecord",
            "ApiError",
            "ConsolidationScope",
            "ConsolidationReport",
            // Path 1 derived-layer schemas (v0.4.0+):
            "ThemeHit",
            "FactHit",
            "ContradictionHit",
            // v0.5.0 Priority 3:
            "ClusterRecord",
            // v0.7.0 P6 — document schemas:
            "IngestDocumentRequest",
            "IngestReport",
            "ForgetDocumentReport",
            "SearchDocsRequest",
            "DocSearchHit",
            "DocumentInspectResult",
            "DocumentSummary",
        ] {
            let ptr = format!("/components/schemas/{schema_name}");
            assert!(
                spec.pointer(&ptr).is_some(),
                "component schema {schema_name} missing"
            );
        }

        // bearerAuth security scheme is declared (LAN deployments need it).
        assert!(
            spec.pointer("/components/securitySchemes/bearerAuth")
                .is_some(),
            "bearerAuth security scheme missing"
        );

        h.shutdown(&runtime);
    }

    /// `/openapi.json` must remain unauthenticated even when bearer auth
    /// is enabled — the spec describes the API shape, not secrets, and
    /// codegen tooling shouldn't need a credential to fetch it.
    #[test]
    fn openapi_json_is_exempt_from_bearer_auth() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("super-secret".into()));
        let r = h.router.clone();
        // No Authorization header → still 200 for /openapi.json.
        let (status, _body) = runtime.block_on(call(r, "GET", "/openapi.json", None));
        assert_eq!(status, StatusCode::OK);
        h.shutdown(&runtime);
    }

    #[test]
    fn remember_returns_memory_id() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "POST",
            "/memory",
            Some(json!({ "content": "http harness test" })),
        ));
        assert_eq!(status, StatusCode::OK);
        let mid = body.get("memory_id").and_then(|v| v.as_str()).unwrap();
        assert_eq!(mid.len(), 36, "uuid length");
        h.shutdown(&runtime);
    }

    #[test]
    fn empty_content_returns_400() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) =
            runtime.block_on(call(r, "POST", "/memory", Some(json!({ "content": "" }))));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        assert!(
            body.get("error")
                .and_then(|e| e.as_str())
                .map(|s| s.contains("must not be empty"))
                .unwrap_or(false),
            "got: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn empty_query_returns_400() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "POST",
            "/memory/search",
            Some(json!({ "query": "" })),
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        assert!(
            body.get("error")
                .and_then(|e| e.as_str())
                .map(|s| s.contains("must not be empty"))
                .unwrap_or(false),
            "got: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_unknown_returns_404() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/00000000-0000-7000-8000-000000000000",
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        assert!(body.get("error").is_some(), "got: {body}");
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_invalid_id_returns_400() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(call(r, "GET", "/memory/not-a-uuid", None));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        h.shutdown(&runtime);
    }

    #[test]
    fn forget_unknown_returns_404() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(call(
            r,
            "DELETE",
            "/memory/00000000-0000-7000-8000-000000000000",
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    /// `POST /memory/consolidate` runs the cluster pass and returns
    /// the report as JSON. With an empty body, `ConsolidationScope`
    /// defaults to unbounded; with a non-empty body, the
    /// `window_days` field is honored. The Harness's writer is
    /// spawned without a Steward, so `abstractions_built` stays 0
    /// even when `clusters_built` is nonzero — same posture as the
    /// daemon today.
    #[test]
    fn consolidate_endpoint_returns_report() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            // Empty DB → all-zero report; structural assertion only.
            let (status, body) = call(r.clone(), "POST", "/memory/consolidate", None).await;
            assert_eq!(status, StatusCode::OK);
            for field in [
                "episodes_seen",
                "clusters_built",
                "episodes_clustered",
                "abstractions_built",
                "triples_built",
                "contradictions_found",
            ] {
                assert!(
                    body.get(field).and_then(|v| v.as_u64()).is_some(),
                    "missing field {field}: {body}"
                );
            }
            assert_eq!(body["episodes_seen"], 0);
            assert_eq!(body["clusters_built"], 0);

            // Non-empty body with window_days → still 200; unmistakable
            // shape round-trips through ConsolidationScope's serde.
            let (status2, _body2) = call(
                r,
                "POST",
                "/memory/consolidate",
                Some(json!({ "window_days": 7 })),
            )
            .await;
            assert_eq!(status2, StatusCode::OK);
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn auth_required_routes_reject_missing_token() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("secret-xyz".into()));
        let r = h.router.clone();
        runtime.block_on(async move {
            // No Authorization header → 401.
            let (status, _body) = call(
                r.clone(),
                "POST",
                "/memory",
                Some(json!({ "content": "x" })),
            )
            .await;
            assert_eq!(status, StatusCode::UNAUTHORIZED);

            // Wrong token → 401.
            let (status, _body) = call_with_auth(
                r.clone(),
                "POST",
                "/memory",
                Some(json!({ "content": "x" })),
                Some("Bearer wrong-token"),
            )
            .await;
            assert_eq!(status, StatusCode::UNAUTHORIZED);

            // Correct token → handler runs (200).
            let (status, body) = call_with_auth(
                r.clone(),
                "POST",
                "/memory",
                Some(json!({ "content": "authed" })),
                Some("Bearer secret-xyz"),
            )
            .await;
            assert_eq!(status, StatusCode::OK);
            assert!(body.get("memory_id").is_some());
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn health_endpoint_does_not_require_auth() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(call(r, "GET", "/health", None));
        // Liveness probes should work without credentials.
        assert_eq!(status, StatusCode::OK);
        h.shutdown(&runtime);
    }

    #[test]
    fn auth_response_includes_www_authenticate_header() {
        // Verify the WWW-Authenticate hint that lets a well-behaved
        // client know it's a bearer-auth scheme. We check via raw
        // request → response (oneshot returns Response, but our
        // call() helper drops the headers; build the request manually).
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("secret".into()));
        let r = h.router.clone();
        runtime.block_on(async move {
            let req = Request::builder()
                .method("POST")
                .uri("/memory")
                .header("content-type", "application/json")
                .body(Body::from(serde_json::to_vec(&json!({ "content": "x" })).unwrap()))
                .unwrap();
            let resp = r.oneshot(req).await.unwrap();
            assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
            let www = resp
                .headers()
                .get("www-authenticate")
                .and_then(|v| v.to_str().ok())
                .unwrap_or("");
            assert!(
                www.starts_with("Bearer"),
                "expected WWW-Authenticate: Bearer..., got: {www}"
            );
        });
        h.shutdown(&runtime);
    }

    // ---------------------------------------------------------------------
    // v0.8.0 P3: OIDC end-to-end. Spin up a fake IdP (wiremock) that
    // serves an OIDC discovery doc + JWKS, mint a token claiming
    // `solo_tenant = "default"`, and verify it routes through the
    // middleware + TenantExtractor + handler.
    // ---------------------------------------------------------------------

    fn base64_url_for_test(bytes: &[u8]) -> String {
        use base64::Engine;
        base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes)
    }

    /// Spin up a single-purpose fake OIDC IdP for these tests. Returns
    /// (mock_server, discovery_url, secret, kid).
    async fn spin_fake_idp() -> (wiremock::MockServer, String, Vec<u8>, &'static str) {
        use wiremock::matchers::{method, path};
        use wiremock::{Mock, MockServer, ResponseTemplate};
        let server = MockServer::start().await;
        let secret = b"http-test-secret-for-hmac-fixture".to_vec();
        let kid = "http-test-kid";
        let discovery = serde_json::json!({
            "issuer": server.uri(),
            "jwks_uri": format!("{}/jwks", server.uri()),
        });
        Mock::given(method("GET"))
            .and(path("/.well-known/openid-configuration"))
            .respond_with(ResponseTemplate::new(200).set_body_json(discovery))
            .mount(&server)
            .await;
        let jwks = serde_json::json!({
            "keys": [
                {
                    "kty": "oct",
                    "kid": kid,
                    "alg": "HS256",
                    "k": base64_url_for_test(&secret),
                }
            ]
        });
        Mock::given(method("GET"))
            .and(path("/jwks"))
            .respond_with(ResponseTemplate::new(200).set_body_json(jwks))
            .mount(&server)
            .await;
        let discovery_url = format!("{}/.well-known/openid-configuration", server.uri());
        (server, discovery_url, secret, kid)
    }

    fn mint_idp_token(
        server_uri: &str,
        kid: &str,
        secret: &[u8],
        tenant_claim: &str,
        audience: &str,
    ) -> String {
        use jsonwebtoken::{Algorithm, EncodingKey, Header};
        let mut header = Header::new(Algorithm::HS256);
        header.kid = Some(kid.to_string());
        let now = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_secs();
        let claims = serde_json::json!({
            "iss": server_uri,
            "sub": "test-user-1",
            "aud": audience,
            "exp": now + 600,
            "iat": now,
            "solo_tenant": tenant_claim,
        });
        jsonwebtoken::encode(&header, &claims, &EncodingKey::from_secret(secret))
            .expect("mint token")
    }

    #[test]
    fn http_oidc_accept_resolves_to_tenant_from_claim() {
        let runtime = rt();
        let (fake_server, discovery_url, secret, kid) =
            runtime.block_on(async { spin_fake_idp().await });
        let server_uri = fake_server.uri();
        // Keep the wiremock server alive for the duration of this test.
        let _server_guard = fake_server;

        let auth = crate::auth::AuthConfig::Oidc {
            discovery_url,
            audience: "test-audience".to_string(),
            tenant_claim_name: "solo_tenant".to_string(),
        };
        let h = Harness::new_with_auth_config(&runtime, Some(auth));
        let r = h.router.clone();

        // Mint a token claiming the harness's default tenant.
        let token = mint_idp_token(
            &server_uri,
            kid,
            &secret,
            "default",
            "test-audience",
        );

        runtime.block_on(async move {
            // POST /memory with a valid OIDC token → handler runs, returns memory_id.
            let (status, body) = call_with_auth(
                r.clone(),
                "POST",
                "/memory",
                Some(json!({ "content": "oidc-routed content" })),
                Some(&format!("Bearer {token}")),
            )
            .await;
            assert_eq!(status, StatusCode::OK, "got body: {body}");
            assert!(body.get("memory_id").is_some(), "no memory_id in {body}");
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn http_oidc_reject_missing_token_returns_401() {
        let runtime = rt();
        let (fake_server, discovery_url, _secret, _kid) =
            runtime.block_on(async { spin_fake_idp().await });
        let _server_guard = fake_server;
        let auth = crate::auth::AuthConfig::Oidc {
            discovery_url,
            audience: "test-audience".to_string(),
            tenant_claim_name: "solo_tenant".to_string(),
        };
        let h = Harness::new_with_auth_config(&runtime, Some(auth));
        let r = h.router.clone();
        runtime.block_on(async move {
            // No Authorization header.
            let (status, _body) =
                call(r.clone(), "POST", "/memory", Some(json!({ "content": "x" }))).await;
            assert_eq!(status, StatusCode::UNAUTHORIZED);

            // Garbage token → 401 (invalid signature / not a JWT).
            let (status, _body) = call_with_auth(
                r.clone(),
                "POST",
                "/memory",
                Some(json!({ "content": "x" })),
                Some("Bearer not-a-real-jwt"),
            )
            .await;
            assert_eq!(status, StatusCode::UNAUTHORIZED);
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn full_remember_recall_inspect_forget_round_trip() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            // POST /memory
            let (status, body) = call(
                r.clone(),
                "POST",
                "/memory",
                Some(json!({ "content": "round-trip content" })),
            )
            .await;
            assert_eq!(status, StatusCode::OK);
            let mid = body
                .get("memory_id")
                .and_then(|v| v.as_str())
                .unwrap()
                .to_string();

            // POST /memory/search — exact-match (StubEmbedder) returns the row.
            let (status, body) = call(
                r.clone(),
                "POST",
                "/memory/search",
                Some(json!({ "query": "round-trip content", "limit": 5 })),
            )
            .await;
            assert_eq!(status, StatusCode::OK);
            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
            assert!(
                hits.iter()
                    .any(|h| h.get("content").and_then(|c| c.as_str())
                        == Some("round-trip content")),
                "expected hit with content; got: {body}"
            );

            // GET /memory/{id}
            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
            assert_eq!(status, StatusCode::OK);
            assert_eq!(body.get("status").and_then(|v| v.as_str()), Some("active"));

            // DELETE /memory/{id}
            let (status, _body) =
                call(r.clone(), "DELETE", &format!("/memory/{mid}"), None).await;
            assert_eq!(status, StatusCode::NO_CONTENT);

            // GET again — still readable but status='forgotten'
            let (status, body) = call(r.clone(), "GET", &format!("/memory/{mid}"), None).await;
            assert_eq!(status, StatusCode::OK);
            assert_eq!(
                body.get("status").and_then(|v| v.as_str()),
                Some("forgotten")
            );

            // POST /memory/search — forgotten row excluded.
            let (status, body) = call(
                r.clone(),
                "POST",
                "/memory/search",
                Some(json!({ "query": "round-trip content", "limit": 5 })),
            )
            .await;
            assert_eq!(status, StatusCode::OK);
            let hits = body.get("hits").and_then(|v| v.as_array()).unwrap();
            assert!(
                hits.iter().all(|h| h.get("memory_id").and_then(|m| m.as_str())
                    != Some(mid.as_str())),
                "forgotten row should be excluded from recall: {body}"
            );
        });
        h.shutdown(&runtime);
    }

    // Path 1 derived-layer endpoint tests (v0.4.0+). Wire-path only —
    // the actual content correctness is covered by solo-query::derived's
    // own tests (Sub-task A). These verify the HTTP shape: GET routing,
    // Query-string param parsing, JSON-array response body, validation
    // 400s for invalid inputs.

    #[test]
    fn themes_endpoint_returns_empty_array_on_empty_db() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) =
            runtime.block_on(call(r, "GET", "/memory/themes", None));
        assert_eq!(status, StatusCode::OK);
        assert!(body.is_array(), "expected array, got {body}");
        assert_eq!(body.as_array().unwrap().len(), 0);
        h.shutdown(&runtime);
    }

    #[test]
    fn themes_endpoint_passes_through_query_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/themes?window_days=7&limit=20",
            None,
        ));
        assert_eq!(status, StatusCode::OK);
        assert!(body.is_array(), "expected array, got {body}");
        h.shutdown(&runtime);
    }

    #[test]
    fn facts_about_endpoint_requires_subject() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        // Missing subject — axum's Query extractor 422 (Unprocessable
        // Entity) on missing required field; some axum versions
        // surface as 400. Accept either.
        let (status, _body) =
            runtime.block_on(call(r, "GET", "/memory/facts_about", None));
        assert!(
            status == StatusCode::BAD_REQUEST
                || status == StatusCode::UNPROCESSABLE_ENTITY,
            "expected 400 or 422 for missing subject, got {status}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn facts_about_endpoint_rejects_blank_subject() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        // Whitespace-only subject reaches the handler then trips its
        // own validation → ApiError::bad_request → 400.
        let (status, body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/facts_about?subject=%20%20",
            None,
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        assert!(
            body.get("error")
                .and_then(|v| v.as_str())
                .is_some_and(|s| s.contains("subject")),
            "expected error mentioning subject, got {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn facts_about_endpoint_returns_empty_array_for_unknown_subject() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/facts_about?subject=NobodyKnows",
            None,
        ));
        assert_eq!(status, StatusCode::OK);
        assert_eq!(body.as_array().unwrap().len(), 0);
        h.shutdown(&runtime);
    }

    #[test]
    fn facts_about_endpoint_parses_include_as_object_query_param() {
        // v0.5.1 P8: `?include_as_object=true` must parse cleanly
        // through the `Query<FactsAboutQuery>` extractor. If the
        // struct field is missing or wrongly typed, axum returns
        // 400/422 before reaching the handler. We don't seed
        // triples; we only need the request to reach the handler
        // and produce a normal 200 + empty array. Mirrors
        // `inspect_cluster_endpoint_passes_full_content_query_param`.
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/facts_about?subject=Maya&include_as_object=true",
            None,
        ));
        assert_eq!(
            status,
            StatusCode::OK,
            "expected 200 with include_as_object query param, got {status}"
        );
        assert!(body.is_array());
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_cluster_endpoint_unknown_id_returns_404() {
        // Maps `Error::NotFound` from `solo_query::inspect_cluster`
        // through `ApiError::from` → 404. Mirrors the unknown-memory
        // case for `GET /memory/{id}`.
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/clusters/no-such-cluster",
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        assert!(
            body.get("error")
                .and_then(|v| v.as_str())
                .is_some_and(|s| s.contains("no-such-cluster")),
            "expected error mentioning cluster id, got {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_cluster_endpoint_passes_full_content_query_param() {
        // Even with no matching cluster (→ 404), the request must
        // reach the handler — proves the `?full_content=true` query
        // string parses cleanly (Query<InspectClusterQuery>::default
        // path didn't choke). If we accidentally fail at the extractor
        // we'd get a 400/422, not the expected 404.
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/clusters/missing?full_content=true",
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    #[test]
    fn contradictions_endpoint_returns_empty_array_on_empty_db() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/contradictions",
            None,
        ));
        assert_eq!(status, StatusCode::OK);
        assert!(body.is_array());
        assert_eq!(body.as_array().unwrap().len(), 0);
        h.shutdown(&runtime);
    }

    #[test]
    fn derived_endpoints_require_bearer_when_auth_enabled() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("secret-token".to_string()));
        // Each of the three new endpoints should reject missing token.
        // Per the existing tests' shutdown-timing comment: don't hold a
        // long-lived router clone across multiple iterations — drop the
        // clone before each subsequent oneshot, and don't keep a `let r =
        // h.router.clone()` alive across h.shutdown(). Re-clone per
        // iteration; the per-call clone is consumed by oneshot.
        for path in [
            "/memory/themes",
            "/memory/facts_about?subject=Sam",
            "/memory/contradictions",
            "/memory/clusters/any-id",
        ] {
            let (status, _) = runtime.block_on(call(h.router.clone(), "GET", path, None));
            assert_eq!(
                status,
                StatusCode::UNAUTHORIZED,
                "{path} should 401 without token"
            );
        }
        h.shutdown(&runtime);
    }

    // ---- Document endpoints (v0.7.0 P6) ----
    //
    // Wire-path coverage. The `Harness` here uses
    // `WriterActor::spawn_full` without an embedder — same shape as the
    // existing handler tests. Ingest/search would fail at the writer
    // boundary with "writer has no embedder", but every other path
    // (404s, malformed ids, route shape, bearer auth gating, OpenAPI
    // documentation) is exercisable. Real end-to-end ingest→search
    // round-trip lives in `mcp_smoke.rs` where a real subprocess runs
    // with a fully-wired writer.

    #[test]
    fn list_documents_endpoint_returns_empty_array_on_empty_db() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(r, "GET", "/memory/documents", None));
        assert_eq!(status, StatusCode::OK);
        assert!(body.is_array(), "expected array, got {body}");
        assert_eq!(body.as_array().unwrap().len(), 0);
        h.shutdown(&runtime);
    }

    #[test]
    fn list_documents_endpoint_parses_query_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/documents?limit=5&offset=0&include_forgotten=true",
            None,
        ));
        assert_eq!(status, StatusCode::OK);
        assert!(body.is_array());
        h.shutdown(&runtime);
    }

    #[test]
    fn ingest_document_endpoint_rejects_empty_path() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "POST",
            "/memory/documents",
            Some(json!({ "path": "" })),
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        assert!(
            body.get("error")
                .and_then(|v| v.as_str())
                .is_some_and(|s| s.contains("path")),
            "expected error mentioning path, got {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn search_docs_endpoint_rejects_empty_query() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "POST",
            "/memory/documents/search",
            Some(json!({ "query": "   " })),
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        assert!(
            body.get("error")
                .and_then(|v| v.as_str())
                .is_some_and(|s| s.contains("must not be empty")
                    || s.contains("doc_search")),
            "expected error mentioning empty query, got {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_document_endpoint_unknown_id_returns_404() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(call(
            r,
            "GET",
            "/memory/documents/00000000-0000-7000-8000-000000000000",
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        assert!(body.get("error").is_some(), "got: {body}");
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_document_endpoint_rejects_malformed_id() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) =
            runtime.block_on(call(r, "GET", "/memory/documents/not-a-uuid", None));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        h.shutdown(&runtime);
    }

    #[test]
    fn forget_document_endpoint_unknown_id_returns_404() {
        // Valid UUID format; no row exists → writer's `forget_document`
        // returns Error::NotFound → mapped to 404 by `ApiError::from`.
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(call(
            r,
            "DELETE",
            "/memory/documents/00000000-0000-7000-8000-000000000000",
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    #[test]
    fn forget_document_endpoint_rejects_malformed_id() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) =
            runtime.block_on(call(r, "DELETE", "/memory/documents/not-a-uuid", None));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        h.shutdown(&runtime);
    }

    #[test]
    fn document_endpoints_require_bearer_when_auth_enabled() {
        // All five doc endpoints sit behind the same authed Router and
        // must 401 without the bearer token. Mirrors
        // `derived_endpoints_require_bearer_when_auth_enabled`.
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
        let cases: &[(&str, &str, Option<Value>)] = &[
            ("POST", "/memory/documents", Some(json!({ "path": "/x" }))),
            ("GET", "/memory/documents", None),
            (
                "POST",
                "/memory/documents/search",
                Some(json!({ "query": "x" })),
            ),
            (
                "GET",
                "/memory/documents/00000000-0000-7000-8000-000000000000",
                None,
            ),
            (
                "DELETE",
                "/memory/documents/00000000-0000-7000-8000-000000000000",
                None,
            ),
        ];
        for (method, path, body) in cases {
            let (status, _) =
                runtime.block_on(call(h.router.clone(), method, path, body.clone()));
            assert_eq!(
                status,
                StatusCode::UNAUTHORIZED,
                "{method} {path} should 401 without token"
            );
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn document_endpoints_accept_correct_bearer_token() {
        // Sanity check: with the right token, the same five endpoints
        // pass auth and reach the handler. We only assert that the
        // status code is NOT 401 — exact downstream behaviour depends
        // on the harness (no embedder → ingest/search would 500; empty
        // DB → list/inspect/forget return 200/404).
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("doc-secret".to_string()));
        runtime.block_on(async {
            // GET /memory/documents → 200 + empty array (auth passes).
            let (status, _) = call_with_auth(
                h.router.clone(),
                "GET",
                "/memory/documents",
                None,
                Some("Bearer doc-secret"),
            )
            .await;
            assert_eq!(status, StatusCode::OK);

            // GET /memory/documents/<unknown> → 404 (auth passes).
            let (status, _) = call_with_auth(
                h.router.clone(),
                "GET",
                "/memory/documents/00000000-0000-7000-8000-000000000000",
                None,
                Some("Bearer doc-secret"),
            )
            .await;
            assert_eq!(status, StatusCode::NOT_FOUND);
        });
        h.shutdown(&runtime);
    }

    // ---------------------------------------------------------------------
    // v0.8.0 P2: tenant header extractor tests
    // ---------------------------------------------------------------------

    /// `X-Solo-Tenant: default` resolves to the default tenant (which
    /// in the test harness is the only one wired in the registry).
    #[test]
    fn tenant_header_default_resolves() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(async {
            let req = Request::builder()
                .method("GET")
                .uri("/memory/00000000-0000-7000-8000-000000000000")
                .header("x-solo-tenant", "default")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.expect("oneshot");
            let s = resp.status();
            let _b = resp.into_body().collect().await.unwrap().to_bytes();
            (s, _b)
        });
        // 404 because the id doesn't exist — but it's a routed 404 from
        // inspect_handler, not a 400 from a bad tenant header. That's
        // the proof point.
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    /// `X-Solo-Tenant: UPPER` → 400 (invalid tenant id format).
    #[test]
    fn tenant_header_invalid_returns_400() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, body) = runtime.block_on(async {
            let req = Request::builder()
                .method("GET")
                .uri("/memory/00000000-0000-7000-8000-000000000000")
                .header("x-solo-tenant", "UPPER")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.expect("oneshot");
            let s = resp.status();
            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
            let v: Value = serde_json::from_slice(&bytes).unwrap_or(Value::Null);
            (s, v)
        });
        assert_eq!(status, StatusCode::BAD_REQUEST);
        let msg = body.get("error").and_then(|e| e.as_str()).unwrap_or("");
        assert!(
            msg.to_lowercase().contains("tenant") || msg.to_lowercase().contains("invalid"),
            "error must mention tenant/invalid: {msg}"
        );
        h.shutdown(&runtime);
    }

    /// `X-Solo-Tenant: never-registered` → 404 (unknown tenant id).
    #[test]
    fn tenant_header_unknown_returns_404() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(async {
            let req = Request::builder()
                .method("GET")
                .uri("/memory/00000000-0000-7000-8000-000000000000")
                .header("x-solo-tenant", "never-registered")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.expect("oneshot");
            let s = resp.status();
            let _b = resp.into_body().collect().await.unwrap().to_bytes();
            (s, _b)
        });
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    /// No `X-Solo-Tenant` header → falls back to state.default_tenant.
    /// The reach-through to `inspect_handler` should produce the normal
    /// 404 for an unknown id rather than a tenant-routing error.
    #[test]
    fn tenant_header_missing_defaults_to_state_default_tenant() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(async {
            let req = Request::builder()
                .method("GET")
                .uri("/memory/00000000-0000-7000-8000-000000000000")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.expect("oneshot");
            let s = resp.status();
            let _b = resp.into_body().collect().await.unwrap().to_bytes();
            (s, _b)
        });
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    // ---------------------------------------------------------------------
    // v0.9.x: GET /v1/graph/expand
    //
    // Seeds tables directly via the Harness's side connection and walks
    // the four expansion kinds. The Harness is single-tenant (default);
    // the routing-isolation case is already covered by the
    // `tenant_header_*` tests above (an `X-Solo-Tenant: never-registered`
    // header against the same node_id surfaces 404 from the registry,
    // proving cross-tenant lookups can't bleed).
    // ---------------------------------------------------------------------

    /// Insert one episode row directly. Returns its rowid for callers
    /// that need to wire `triples.source_episode_id`.
    fn seed_episode(
        conn: &rusqlite::Connection,
        memory_id: &str,
        ts_ms: i64,
        content: &str,
    ) -> i64 {
        conn.execute(
            "INSERT INTO episodes
                (memory_id, ts_ms, source_type, content,
                 encoding_context_json, tier, status,
                 confidence, strength, salience,
                 created_at_ms, updated_at_ms)
                VALUES (?1, ?2, 'user_message', ?3,
                        '{}', 'hot', 'active',
                        1.0, 0.5, 0.5, ?2, ?2)",
            rusqlite::params![memory_id, ts_ms, content],
        )
        .expect("seed episode");
        conn.last_insert_rowid()
    }

    fn seed_cluster_row(conn: &rusqlite::Connection, cluster_id: &str, created_at_ms: i64) {
        conn.execute(
            "INSERT INTO clusters (cluster_id, coherence, created_at_ms)
                  VALUES (?1, 0.5, ?2)",
            rusqlite::params![cluster_id, created_at_ms],
        )
        .expect("seed cluster");
    }

    fn seed_cluster_member(conn: &rusqlite::Connection, cluster_id: &str, memory_id: &str) {
        conn.execute(
            "INSERT INTO cluster_episodes (cluster_id, memory_id) VALUES (?1, ?2)",
            rusqlite::params![cluster_id, memory_id],
        )
        .expect("seed cluster_episodes");
    }

    fn seed_document_row(conn: &rusqlite::Connection, doc_id: &str, title: &str) {
        conn.execute(
            "INSERT INTO documents
                (doc_id, source, title, mime_type, ingested_at_ms,
                 modified_at_ms, status, chunk_count, content_hash, byte_size)
                VALUES (?1, ?2, ?3, 'text/plain', 0, NULL,
                        'active', 0, ?1, NULL)",
            rusqlite::params![doc_id, format!("/tmp/{title}.txt"), title],
        )
        .expect("seed doc");
    }

    fn seed_chunk_row(
        conn: &rusqlite::Connection,
        chunk_id: &str,
        doc_id: &str,
        chunk_index: i64,
        content: &str,
    ) {
        conn.execute(
            "INSERT INTO document_chunks
                (chunk_id, doc_id, chunk_index, content,
                 token_count, start_offset, end_offset, created_at_ms)
                VALUES (?1, ?2, ?3, ?4, 1, 0, ?5, 0)",
            rusqlite::params![chunk_id, doc_id, chunk_index, content, content.len() as i64],
        )
        .expect("seed chunk");
    }

    fn seed_triple_row(
        conn: &rusqlite::Connection,
        triple_id: &str,
        subject: &str,
        predicate: &str,
        object: &str,
        source_episode_rowid: Option<i64>,
    ) {
        conn.execute(
            "INSERT INTO triples
                 (triple_id, subject_id, predicate, object_id, object_kind,
                  valid_from_ms, valid_to_ms, confidence, provenance_json,
                  status, created_at_ms, updated_at_ms, source_episode_id)
                 VALUES (?1, ?2, ?3, ?4, 'literal', 0, NULL, 0.9, '{}',
                         'active', 0, 0, ?5)",
            rusqlite::params![triple_id, subject, predicate, object, source_episode_rowid],
        )
        .expect("seed triple");
    }

    /// Insert a `semantic_abstractions` row (cluster LLM summary). Used
    /// by the cluster-inspect test to verify the abstraction concat path.
    fn seed_abstraction_row(
        conn: &rusqlite::Connection,
        abstraction_id: &str,
        cluster_id: &str,
        content: &str,
    ) {
        conn.execute(
            "INSERT INTO semantic_abstractions
                 (abstraction_id, cluster_id, content, provenance_json,
                  confidence, created_at_ms)
                 VALUES (?1, ?2, ?3, '{}', 0.9, 0)",
            rusqlite::params![abstraction_id, cluster_id, content],
        )
        .expect("seed abstraction");
    }

    /// Tests use simple ASCII node_ids (UUID-shaped + plain entity strings),
    /// so we percent-encode only `:` and a few other delimiters by hand.
    fn percent_encode_node_id(node_id: &str) -> String {
        let mut out = String::with_capacity(node_id.len());
        for c in node_id.chars() {
            match c {
                ':' => out.push_str("%3A"),
                ' ' => out.push_str("%20"),
                '&' => out.push_str("%26"),
                '+' => out.push_str("%2B"),
                '?' => out.push_str("%3F"),
                '#' => out.push_str("%23"),
                _ => out.push(c),
            }
        }
        out
    }

    fn graph_uri(node_id: &str, kind: &str) -> String {
        let encoded = percent_encode_node_id(node_id);
        format!("/v1/graph/expand?node_id={encoded}&kind={kind}")
    }

    fn graph_uri_with_limit(node_id: &str, kind: &str, limit: u32) -> String {
        let encoded = percent_encode_node_id(node_id);
        format!("/v1/graph/expand?node_id={encoded}&kind={kind}&limit={limit}")
    }

    #[test]
    fn expand_cluster_member_from_episode_returns_clusters() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let memory_id = "11111111-1111-7000-8000-000000000001";
        {
            let conn = h.open_db();
            seed_episode(&conn, memory_id, 100, "ep content");
            seed_cluster_row(&conn, "cl-a", 200);
            seed_cluster_member(&conn, "cl-a", memory_id);
        }
        let node_id = format!("ep:{memory_id}");
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri(&node_id, "cluster_member"),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body.get("nodes").and_then(|v| v.as_array()).expect("nodes array");
        let edges = body.get("edges").and_then(|v| v.as_array()).expect("edges array");
        assert_eq!(nodes.len(), 1, "{body}");
        assert_eq!(nodes[0]["id"], "cl:cl-a");
        assert_eq!(nodes[0]["kind"], "cluster");
        assert_eq!(edges.len(), 1);
        assert_eq!(edges[0]["source"], node_id);
        assert_eq!(edges[0]["target"], "cl:cl-a");
        assert_eq!(edges[0]["kind"], "cluster_member");
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_cluster_member_from_cluster_returns_episodes() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            seed_cluster_row(&conn, "cl-multi", 500);
            for i in 0..5 {
                let mid = format!("2222{i}222-2222-7000-8000-000000000001");
                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
                seed_cluster_member(&conn, "cl-multi", &mid);
            }
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri_with_limit("cl:cl-multi", "cluster_member", 3),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        let edges = body["edges"].as_array().unwrap();
        assert_eq!(nodes.len(), 3, "limit honored: {body}");
        assert_eq!(edges.len(), 3);
        for n in nodes {
            assert_eq!(n["kind"], "episode");
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_document_chunk_from_document_returns_chunks() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let doc_id = "33333333-3333-7000-8000-000000000001";
        {
            let conn = h.open_db();
            seed_document_row(&conn, doc_id, "doc A");
            // Insert chunks in shuffled order so the ORDER BY chunk_index
            // is load-bearing.
            seed_chunk_row(&conn, "c2", doc_id, 2, "chunk 2 text");
            seed_chunk_row(&conn, "c0", doc_id, 0, "chunk 0 text");
            seed_chunk_row(&conn, "c1", doc_id, 1, "chunk 1 text");
            seed_chunk_row(&conn, "c3", doc_id, 3, "chunk 3 text");
        }
        let node_id = format!("doc:{doc_id}");
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri(&node_id, "document_chunk"),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        let edges = body["edges"].as_array().unwrap();
        assert_eq!(nodes.len(), 4);
        assert_eq!(edges.len(), 4);
        // Verify in-order chunk_index emission.
        assert_eq!(nodes[0]["id"], "chunk:c0");
        assert_eq!(nodes[1]["id"], "chunk:c1");
        assert_eq!(nodes[2]["id"], "chunk:c2");
        assert_eq!(nodes[3]["id"], "chunk:c3");
        for e in edges {
            assert_eq!(e["kind"], "document_chunk");
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_document_chunk_from_chunk_returns_parent_document() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let doc_id = "44444444-4444-7000-8000-000000000001";
        {
            let conn = h.open_db();
            seed_document_row(&conn, doc_id, "parent doc");
            seed_chunk_row(&conn, "c-orphan", doc_id, 0, "chunk content");
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri("chunk:c-orphan", "document_chunk"),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        let edges = body["edges"].as_array().unwrap();
        assert_eq!(nodes.len(), 1);
        assert_eq!(edges.len(), 1);
        assert_eq!(nodes[0]["id"], format!("doc:{doc_id}"));
        assert_eq!(edges[0]["source"], "chunk:c-orphan");
        assert_eq!(edges[0]["target"], format!("doc:{doc_id}"));
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_triple_from_episode_returns_entities() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let memory_id = "55555555-5555-7000-8000-000000000001";
        let rowid;
        {
            let conn = h.open_db();
            rowid = seed_episode(&conn, memory_id, 100, "alice works at anthropic");
            // Two distinct triples → 4 entity endpoints (Alice, Anthropic, Bob, NYC).
            seed_triple_row(&conn, "t1", "Alice", "works_at", "Anthropic", Some(rowid));
            seed_triple_row(&conn, "t2", "Bob", "lives_in", "NYC", Some(rowid));
        }
        let node_id = format!("ep:{memory_id}");
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri(&node_id, "triple"),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        let edges = body["edges"].as_array().unwrap();
        assert_eq!(nodes.len(), 4, "expected 4 unique entity nodes: {body}");
        assert_eq!(edges.len(), 2);
        let ids: std::collections::HashSet<String> = nodes
            .iter()
            .map(|n| n["id"].as_str().unwrap().to_string())
            .collect();
        for expected in ["ent:Alice", "ent:Anthropic", "ent:Bob", "ent:NYC"] {
            assert!(ids.contains(expected), "missing {expected} in {body}");
        }
        for e in edges {
            assert_eq!(e["kind"], "triple");
            assert!(e["predicate"].is_string(), "predicate set: {body}");
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_triple_from_entity_returns_episodes() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            let r1 = seed_episode(
                &conn,
                "66666666-6666-7000-8000-000000000001",
                100,
                "alice ep one",
            );
            let r2 = seed_episode(
                &conn,
                "66666666-6666-7000-8000-000000000002",
                200,
                "alice ep two",
            );
            let r3 = seed_episode(
                &conn,
                "66666666-6666-7000-8000-000000000003",
                300,
                "alice ep three",
            );
            // 3 triples all mentioning Alice on one side or another.
            seed_triple_row(&conn, "t1", "Alice", "p", "Bob", Some(r1));
            seed_triple_row(&conn, "t2", "Carol", "p", "Alice", Some(r2));
            seed_triple_row(&conn, "t3", "Alice", "q", "Dave", Some(r3));
            // One triple with no source — must be skipped by the IS NOT NULL filter.
            seed_triple_row(&conn, "t-orphan", "Alice", "p", "Eve", None);
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri("ent:Alice", "triple"),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        let edges = body["edges"].as_array().unwrap();
        assert_eq!(nodes.len(), 3, "expected 3 episodes: {body}");
        assert_eq!(edges.len(), 3);
        for n in nodes {
            assert_eq!(n["kind"], "episode");
        }
        for e in edges {
            assert_eq!(e["source"], "ent:Alice");
            assert_eq!(e["kind"], "triple");
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_semantic_from_episode_returns_similar() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        // Seed three episodes via the writer-actor so they get embedded
        // + inserted into HNSW. StubEmbedder is deterministic: identical
        // content → identical vector → cos_distance = 0. So we use
        // distinct strings, then expand from one of them and assert at
        // least one similar peer comes back.
        runtime.block_on(async {
            let mid1 = post_remember(h.router.clone(), "alpha alpha alpha").await;
            let _mid2 = post_remember(h.router.clone(), "beta beta beta").await;
            let _mid3 = post_remember(h.router.clone(), "gamma gamma gamma").await;
            // Expand from mid1.
            let (status, body) = call(
                h.router.clone(),
                "GET",
                &graph_uri_with_limit(&format!("ep:{mid1}"), "semantic", 5),
                None,
            )
            .await;
            assert_eq!(status, StatusCode::OK, "body: {body}");
            let nodes = body["nodes"].as_array().unwrap();
            let edges = body["edges"].as_array().unwrap();
            // Must NOT include the source.
            for n in nodes {
                assert_ne!(
                    n["id"].as_str().unwrap(),
                    format!("ep:{mid1}"),
                    "self must be excluded: {body}"
                );
            }
            // Edges must be tagged semantic with a numeric weight.
            for e in edges {
                assert_eq!(e["kind"], "semantic");
                assert!(e["weight"].is_number(), "weight set: {body}");
            }
        });
        h.shutdown(&runtime);
    }

    /// Helper: POST /memory and return the new memory_id.
    async fn post_remember(router: axum::Router, content: &str) -> String {
        let (status, body) = call(
            router,
            "POST",
            "/memory",
            Some(json!({ "content": content })),
        )
        .await;
        assert_eq!(status, StatusCode::OK, "post failed: {body}");
        body["memory_id"].as_str().unwrap().to_string()
    }

    #[test]
    fn expand_400_on_invalid_kind() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let (status, _body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/expand?node_id=ep:any&kind=banana",
            None,
        ));
        // axum's Query extractor rejects unknown enum value with 400/422.
        assert!(
            status == StatusCode::BAD_REQUEST || status == StatusCode::UNPROCESSABLE_ENTITY,
            "expected 400/422 for bad kind, got {status}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_400_on_invalid_node_for_kind() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        // kind=semantic from a cluster source → 400.
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri("cl:doesnt-matter", "semantic"),
            None,
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        assert!(
            body["error"]
                .as_str()
                .is_some_and(|s| s.contains("semantic only valid for episode")),
            "got: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_404_on_missing_node_id() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND, "{body}");
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_limit_clamped_at_100() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        // Seed > 100 cluster members so we can see the clamp in action.
        {
            let conn = h.open_db();
            seed_cluster_row(&conn, "cl-huge", 1_000);
            for i in 0..150 {
                let mid = format!("77777777-7777-7000-8000-{:012}", i);
                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
                seed_cluster_member(&conn, "cl-huge", &mid);
            }
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri_with_limit("cl:cl-huge", "cluster_member", 999),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        assert_eq!(
            nodes.len(),
            100,
            "limit must be silently clamped to 100, got {}",
            nodes.len()
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_bad_node_id_prefix_returns_400() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/expand?node_id=garbage&kind=cluster_member",
            None,
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST);
        assert!(
            body["error"]
                .as_str()
                .is_some_and(|s| s.contains("node_id must be")),
            "got: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_respects_tenant_scoping_via_unknown_tenant_header() {
        // Routing via X-Solo-Tenant: a header pointing to an unknown
        // tenant must 404 before the handler even runs — the
        // TenantExtractor is the gatekeeper, so node ids can't be
        // resolved against the wrong tenant's DB.
        let runtime = rt();
        let h = Harness::new(&runtime);
        // Seed a real episode in the default tenant so we know it
        // exists there. If tenant scoping leaked, this lookup would 200
        // even with the wrong tenant header.
        let memory_id = "88888888-8888-7000-8000-000000000001";
        {
            let conn = h.open_db();
            seed_episode(&conn, memory_id, 100, "scoped");
            seed_cluster_row(&conn, "cl-scoped", 200);
            seed_cluster_member(&conn, "cl-scoped", memory_id);
        }
        let node_id = format!("ep:{memory_id}");
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(async {
            let req = Request::builder()
                .method("GET")
                .uri(graph_uri(&node_id, "cluster_member"))
                .header("x-solo-tenant", "never-registered-tenant")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.expect("oneshot");
            let s = resp.status();
            let _b = resp.into_body().collect().await.unwrap().to_bytes();
            (s, _b)
        });
        // Unknown tenant id → 404 from the registry. Confirms cross-tenant
        // lookups can't smuggle through this endpoint.
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_respects_auth_when_enabled() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("graph-secret".into()));
        // No Authorization header → 401.
        let (status, _) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri("ep:any", "cluster_member"),
            None,
        ));
        assert_eq!(status, StatusCode::UNAUTHORIZED);
        // Right token → handler runs (404 for unknown node, NOT 401).
        let (status, _) = runtime.block_on(call_with_auth(
            h.router.clone(),
            "GET",
            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
            None,
            Some("Bearer graph-secret"),
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    #[test]
    fn expand_works_when_auth_none() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        // Unauthenticated request hits the handler; 404 for unknown node
        // proves the auth-none path doesn't reject the request.
        let (status, _) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &graph_uri("ep:99999999-9999-7000-8000-000000000999", "cluster_member"),
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    // ---------------------------------------------------------------------
    // v0.10.0: GET /v1/graph/nodes + GET /v1/graph/edges
    //
    // Paginated catalog reads. Both endpoints share auth + tenant +
    // cursor scaffolding from /v1/graph/expand, so tests focus on the
    // new surface: filter parsing, entity synthesis cap, cursor round-
    // trip, edge-type defaults (semantic excluded), and the semantic
    // 400 redirect to /v1/graph/neighbors.
    // ---------------------------------------------------------------------

    /// Lower-level helper that captures response headers in addition to
    /// status + JSON body. Used by the entity-cap header test.
    async fn call_with_headers(
        router: axum::Router,
        method: &str,
        uri: &str,
    ) -> (StatusCode, axum::http::HeaderMap, Value) {
        let req = Request::builder()
            .method(method)
            .uri(uri)
            .header("content-length", "0")
            .body(Body::empty())
            .unwrap();
        let resp = router.oneshot(req).await.expect("oneshot");
        let status = resp.status();
        let headers = resp.headers().clone();
        let body_bytes = resp.into_body().collect().await.unwrap().to_bytes();
        let v: Value = if body_bytes.is_empty() {
            Value::Null
        } else {
            serde_json::from_slice(&body_bytes).unwrap_or(Value::Null)
        };
        (status, headers, v)
    }

    #[test]
    fn nodes_returns_all_kinds_when_no_filter() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            let rowid = seed_episode(
                &conn,
                "aaaaaaaa-0000-7000-8000-000000000001",
                100,
                "episode one",
            );
            seed_document_row(&conn, "doc-1", "doc one");
            seed_chunk_row(&conn, "chunk-1", "doc-1", 0, "chunk one body");
            seed_cluster_row(&conn, "cl-one", 200);
            seed_triple_row(
                &conn,
                "t-one",
                "Alice",
                "knows",
                "Bob",
                Some(rowid),
            );
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/nodes",
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        let kinds: std::collections::HashSet<&str> = nodes
            .iter()
            .map(|n| n["kind"].as_str().unwrap())
            .collect();
        for expected in ["episode", "document", "chunk", "cluster", "entity"] {
            assert!(
                kinds.contains(expected),
                "expected {expected} kind in response: {body}"
            );
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn nodes_filter_by_single_kind() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            seed_episode(&conn, "bbbbbbbb-0000-7000-8000-000000000001", 100, "ep");
            seed_document_row(&conn, "doc-only", "d");
            seed_cluster_row(&conn, "cl-only", 300);
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/nodes?kind=episode",
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        assert!(!nodes.is_empty(), "{body}");
        for n in nodes {
            assert_eq!(n["kind"], "episode", "kind filter must be exclusive: {body}");
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn nodes_filter_by_multiple_kinds() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            seed_episode(&conn, "cccccccc-0000-7000-8000-000000000001", 100, "ep");
            seed_document_row(&conn, "doc-multi", "d");
            seed_cluster_row(&conn, "cl-multi", 300);
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/nodes?kind=episode,document",
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        let kinds: std::collections::HashSet<&str> = nodes
            .iter()
            .map(|n| n["kind"].as_str().unwrap())
            .collect();
        assert!(kinds.contains("episode"), "{body}");
        assert!(kinds.contains("document"), "{body}");
        assert!(
            !kinds.contains("cluster"),
            "cluster must be filtered out: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn nodes_entity_synthesis_caps_at_200() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            // Seed one episode + 250 distinct triple object values so the
            // entity rollup surfaces >200 entities. ref_count is 1 for
            // each; pick subject = "Alice" for all so the entity count
            // collapses on subject (1 "Alice") + 250 distinct objects.
            let rowid = seed_episode(
                &conn,
                "dddddddd-0000-7000-8000-000000000001",
                100,
                "ep",
            );
            for i in 0..250 {
                let triple_id = format!("t-cap-{i:03}");
                let obj = format!("Entity{i:03}");
                seed_triple_row(&conn, &triple_id, "Alice", "knows", &obj, Some(rowid));
            }
        }
        let (status, headers, body) = runtime.block_on(call_with_headers(
            h.router.clone(),
            "GET",
            "/v1/graph/nodes?kind=entity&limit=500",
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        assert_eq!(
            nodes.len(),
            200,
            "entity cap must be enforced at 200, got {}",
            nodes.len()
        );
        assert_eq!(
            headers
                .get("x-solo-entity-cap-reached")
                .and_then(|v| v.to_str().ok()),
            Some("true"),
            "cap-reached header missing: headers={headers:?}"
        );
        for n in nodes {
            assert_eq!(n["kind"], "entity");
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn nodes_since_until_filter_works() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            seed_episode(
                &conn,
                "eeeeeeee-0000-7000-8000-000000000001",
                100,
                "early",
            );
            seed_episode(
                &conn,
                "eeeeeeee-0000-7000-8000-000000000002",
                500,
                "middle",
            );
            seed_episode(
                &conn,
                "eeeeeeee-0000-7000-8000-000000000003",
                1000,
                "late",
            );
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/nodes?kind=episode&since_ms=400&until_ms=600",
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let nodes = body["nodes"].as_array().unwrap();
        assert_eq!(nodes.len(), 1, "{body}");
        assert_eq!(
            nodes[0]["id"],
            "ep:eeeeeeee-0000-7000-8000-000000000002"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn nodes_pagination_round_trip() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            for i in 0..150 {
                let mid = format!("f0000000-0000-7000-8000-{i:012}");
                // ts_ms scales with i so the sort order is deterministic;
                // newest (highest i) appears first.
                seed_episode(&conn, &mid, 1_000 + i as i64, "page");
            }
        }
        let limit = 50u32;
        let mut seen: std::collections::HashSet<String> = Default::default();
        let mut next_cursor: Option<String> = None;
        for page_idx in 0..4 {
            let cursor_param = next_cursor
                .as_deref()
                .map(|c| format!("&cursor={c}"))
                .unwrap_or_default();
            let uri = format!(
                "/v1/graph/nodes?kind=episode&limit={limit}{cursor_param}"
            );
            let (status, body) =
                runtime.block_on(call(h.router.clone(), "GET", &uri, None));
            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
            let nodes = body["nodes"].as_array().unwrap();
            assert!(
                nodes.len() <= limit as usize,
                "page {page_idx} over-fetched: {body}"
            );
            for n in nodes {
                let id = n["id"].as_str().unwrap().to_string();
                assert!(seen.insert(id.clone()), "duplicate id across pages: {id}");
            }
            next_cursor = body
                .get("next_cursor")
                .and_then(|v| v.as_str())
                .map(|s| s.to_string());
            if next_cursor.is_none() {
                break;
            }
        }
        assert_eq!(
            seen.len(),
            150,
            "expected 150 distinct ids across pages, got {}",
            seen.len()
        );
        assert!(
            next_cursor.is_none(),
            "cursor should be null after last page; got {next_cursor:?}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn nodes_respects_tenant_scoping() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            seed_episode(
                &conn,
                "11110000-0000-7000-8000-000000000001",
                100,
                "tenant scope",
            );
        }
        // Request against a never-registered tenant header → 404 from
        // the tenant extractor before the handler runs.
        let r = h.router.clone();
        let (status, _body) = runtime.block_on(async {
            let req = Request::builder()
                .method("GET")
                .uri("/v1/graph/nodes")
                .header("x-solo-tenant", "never-registered-tenant")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.expect("oneshot");
            let s = resp.status();
            let _b = resp.into_body().collect().await.unwrap().to_bytes();
            (s, _b)
        });
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    #[test]
    fn nodes_respects_auth_when_enabled() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("nodes-secret".into()));
        let (status, _) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/nodes",
            None,
        ));
        assert_eq!(
            status,
            StatusCode::UNAUTHORIZED,
            "must reject unauthenticated request"
        );
        let (status, _) = runtime.block_on(call_with_auth(
            h.router.clone(),
            "GET",
            "/v1/graph/nodes",
            None,
            Some("Bearer nodes-secret"),
        ));
        assert_eq!(status, StatusCode::OK, "must pass through with bearer");
        h.shutdown(&runtime);
    }

    #[test]
    fn nodes_works_with_auth_none() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/nodes",
            None,
        ));
        assert_eq!(status, StatusCode::OK, "{body}");
        assert!(body.get("nodes").is_some());
        h.shutdown(&runtime);
    }

    // --- /v1/graph/edges ---

    #[test]
    fn edges_returns_all_default_kinds() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            let rowid = seed_episode(
                &conn,
                "22220000-0000-7000-8000-000000000001",
                100,
                "ep src",
            );
            seed_triple_row(&conn, "t-def", "Alice", "knows", "Bob", Some(rowid));
            seed_document_row(&conn, "doc-e", "doc");
            seed_chunk_row(&conn, "c-e", "doc-e", 0, "chunk");
            seed_cluster_row(&conn, "cl-e", 200);
            seed_cluster_member(
                &conn,
                "cl-e",
                "22220000-0000-7000-8000-000000000001",
            );
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/edges",
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let edges = body["edges"].as_array().unwrap();
        let kinds: std::collections::HashSet<&str> = edges
            .iter()
            .map(|e| e["kind"].as_str().unwrap())
            .collect();
        assert!(kinds.contains("triple"), "{body}");
        assert!(kinds.contains("document_chunk"), "{body}");
        assert!(kinds.contains("cluster_member"), "{body}");
        assert!(
            !kinds.contains("semantic"),
            "semantic is NOT in default response: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn edges_filter_by_node_id_finds_incident_edges() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let memory_id = "33330000-0000-7000-8000-000000000001";
        {
            let conn = h.open_db();
            let rowid = seed_episode(&conn, memory_id, 100, "ep multi-triple");
            seed_triple_row(&conn, "t-a", "Alice", "p", "Bob", Some(rowid));
            seed_triple_row(&conn, "t-b", "Alice", "p", "Carol", Some(rowid));
            seed_triple_row(&conn, "t-c", "Alice", "p", "Dave", Some(rowid));
            // Decoy episode with its own triple — must NOT come back.
            let decoy_rowid = seed_episode(
                &conn,
                "33330000-0000-7000-8000-000000000999",
                200,
                "decoy",
            );
            seed_triple_row(
                &conn,
                "t-decoy",
                "Alice",
                "p",
                "Eve",
                Some(decoy_rowid),
            );
        }
        let uri = format!(
            "/v1/graph/edges?type=triple&node_id={}",
            percent_encode_node_id(&format!("ep:{memory_id}"))
        );
        let (status, body) =
            runtime.block_on(call(h.router.clone(), "GET", &uri, None));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let edges = body["edges"].as_array().unwrap();
        assert_eq!(edges.len(), 3, "expected 3 incident edges: {body}");
        for e in edges {
            assert_eq!(e["source"], format!("ep:{memory_id}"));
            assert_eq!(e["kind"], "triple");
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn edges_filter_by_type_works() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            let rowid = seed_episode(
                &conn,
                "44440000-0000-7000-8000-000000000001",
                100,
                "ep",
            );
            seed_triple_row(&conn, "t-only", "Alice", "p", "Bob", Some(rowid));
            seed_document_row(&conn, "doc-skip", "doc");
            seed_chunk_row(&conn, "c-skip", "doc-skip", 0, "chunk");
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/edges?type=triple",
            None,
        ));
        assert_eq!(status, StatusCode::OK, "{body}");
        let edges = body["edges"].as_array().unwrap();
        assert!(!edges.is_empty(), "{body}");
        for e in edges {
            assert_eq!(e["kind"], "triple", "{body}");
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn edges_rejects_semantic_type_with_400() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/edges?type=semantic",
            None,
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
        let err = body["error"].as_str().unwrap_or_default();
        assert!(
            err.contains("/v1/graph/neighbors"),
            "error must point to /v1/graph/neighbors: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn edges_pagination_round_trip() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            let rowid = seed_episode(
                &conn,
                "55550000-0000-7000-8000-000000000001",
                100,
                "ep big",
            );
            // 60 triples → 60 triple edges. limit=25 → 3 pages.
            for i in 0..60 {
                let tid = format!("t-page-{i:03}");
                let obj = format!("Obj{i:03}");
                seed_triple_row(&conn, &tid, "Alice", "p", &obj, Some(rowid));
            }
        }
        let limit = 25u32;
        let mut seen: std::collections::HashSet<String> = Default::default();
        let mut next_cursor: Option<String> = None;
        for page_idx in 0..5 {
            let cursor_param = next_cursor
                .as_deref()
                .map(|c| format!("&cursor={c}"))
                .unwrap_or_default();
            let uri = format!(
                "/v1/graph/edges?type=triple&limit={limit}{cursor_param}"
            );
            let (status, body) =
                runtime.block_on(call(h.router.clone(), "GET", &uri, None));
            assert_eq!(status, StatusCode::OK, "page {page_idx}: {body}");
            let edges = body["edges"].as_array().unwrap();
            for e in edges {
                let id = e["id"].as_str().unwrap().to_string();
                assert!(seen.insert(id.clone()), "duplicate edge id: {id}");
            }
            next_cursor = body
                .get("next_cursor")
                .and_then(|v| v.as_str())
                .map(|s| s.to_string());
            if next_cursor.is_none() {
                break;
            }
        }
        assert_eq!(
            seen.len(),
            60,
            "expected 60 distinct edges, got {}",
            seen.len()
        );
        assert!(next_cursor.is_none(), "expected exhausted cursor");
        h.shutdown(&runtime);
    }

    #[test]
    fn edges_respects_tenant_scoping() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            let rowid = seed_episode(
                &conn,
                "66660000-0000-7000-8000-000000000001",
                100,
                "ep",
            );
            seed_triple_row(&conn, "t-tenant", "Alice", "p", "Bob", Some(rowid));
        }
        let r = h.router.clone();
        let (status, _) = runtime.block_on(async {
            let req = Request::builder()
                .method("GET")
                .uri("/v1/graph/edges")
                .header("x-solo-tenant", "never-registered-tenant")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.expect("oneshot");
            let s = resp.status();
            let _b = resp.into_body().collect().await.unwrap().to_bytes();
            (s, _b)
        });
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    #[test]
    fn edges_respects_auth_when_enabled() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("edges-secret".into()));
        let (status, _) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            "/v1/graph/edges",
            None,
        ));
        assert_eq!(status, StatusCode::UNAUTHORIZED);
        let (status, _) = runtime.block_on(call_with_auth(
            h.router.clone(),
            "GET",
            "/v1/graph/edges",
            None,
            Some("Bearer edges-secret"),
        ));
        assert_eq!(status, StatusCode::OK);
        h.shutdown(&runtime);
    }

    // ---------------------------------------------------------------------
    // v0.10.0: GET /v1/graph/inspect/{id}
    //
    // Kind-discriminated full-record drill. Shares auth + tenant + node-id
    // prefix scaffolding with /v1/graph/expand and /v1/graph/{nodes,edges},
    // so tests focus on the new surface: per-kind full_text source +
    // triples_in/out shape + entity zero-triple 404 semantics + the
    // standard 400/404/auth/tenant cases.
    // ---------------------------------------------------------------------

    fn inspect_uri(node_id: &str) -> String {
        // Path parameter must be percent-encoded (`:` is `%3A` after
        // the URI parser splits segments). axum's Path<String>
        // extractor percent-decodes automatically.
        format!("/v1/graph/inspect/{}", percent_encode_node_id(node_id))
    }

    #[test]
    fn inspect_episode_returns_full_text_plus_triples_out() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let memory_id = "a1110000-0000-7000-8000-000000000001";
        let full_text = "Met Alice for coffee at the new place. She mentioned the project is on track but they're hitting issues with the deploy pipeline.";
        {
            let conn = h.open_db();
            let rowid = seed_episode(&conn, memory_id, 1_715_625_600_000, full_text);
            seed_triple_row(&conn, "t-ep-1", "user", "met_with", "Alice", Some(rowid));
            seed_triple_row(&conn, "t-ep-2", "user", "discussed", "deploy_pipeline", Some(rowid));
            seed_triple_row(&conn, "t-ep-3", "Alice", "works_on", "project", Some(rowid));
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri(&format!("ep:{memory_id}")),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        assert_eq!(body["node"]["kind"], "episode");
        assert_eq!(body["node"]["id"], format!("ep:{memory_id}"));
        assert_eq!(
            body["full_text"].as_str().unwrap(),
            full_text,
            "full_text must match episodes.content verbatim, untruncated"
        );
        let triples_out = body["triples_out"].as_array().unwrap();
        assert_eq!(triples_out.len(), 3, "{body}");
        let triples_in = body["triples_in"].as_array().unwrap();
        assert!(triples_in.is_empty(), "episodes have no triples_in: {body}");
        for e in triples_out {
            assert_eq!(e["kind"], "triple");
            assert_eq!(e["source"], format!("ep:{memory_id}"));
            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
            assert!(e["predicate"].as_str().is_some());
            assert!(e["weight"].as_f64().is_some());
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_episode_triples_in_is_empty_for_v10p1() {
        // Seed an episode + a triple from a DIFFERENT episode that
        // happens to mention the focal episode's content. Even with
        // entities referencing the episode topic, episode.triples_in
        // is structurally empty in v0.10.0 P1.
        let runtime = rt();
        let h = Harness::new(&runtime);
        let focal = "a2220000-0000-7000-8000-000000000001";
        let other = "a2220000-0000-7000-8000-000000000002";
        {
            let conn = h.open_db();
            seed_episode(&conn, focal, 100, "focal episode body");
            let other_rowid = seed_episode(&conn, other, 200, "another episode");
            // Entity "user" gets referenced heavily; doesn't matter --
            // episode triples_in stays empty.
            for i in 0..5 {
                let tid = format!("t-other-{i}");
                seed_triple_row(&conn, &tid, "user", "did", "thing", Some(other_rowid));
            }
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri(&format!("ep:{focal}")),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let triples_in = body["triples_in"].as_array().unwrap();
        assert!(
            triples_in.is_empty(),
            "episode triples_in must be empty regardless of cross-episode entity references: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_document_returns_full_text_concatenated_from_chunks() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let doc_id = "d3330000-0000-7000-8000-000000000001";
        {
            let conn = h.open_db();
            seed_document_row(&conn, doc_id, "doc-title");
            seed_chunk_row(&conn, "ch-doc-1", doc_id, 0, "First chunk body.");
            seed_chunk_row(&conn, "ch-doc-2", doc_id, 1, "Second chunk body.");
            seed_chunk_row(&conn, "ch-doc-3", doc_id, 2, "Third chunk body.");
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri(&format!("doc:{doc_id}")),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        assert_eq!(body["node"]["kind"], "document");
        let full_text = body["full_text"].as_str().unwrap();
        // Concatenation order matches chunk_index ASC; separator is "\n\n".
        assert_eq!(
            full_text,
            "First chunk body.\n\nSecond chunk body.\n\nThird chunk body."
        );
        assert!(body["triples_in"].as_array().unwrap().is_empty());
        assert!(body["triples_out"].as_array().unwrap().is_empty());
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_chunk_returns_text() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let chunk_body = "This is the body of the chunk being inspected.";
        {
            let conn = h.open_db();
            seed_document_row(&conn, "doc-chunk-host", "host");
            seed_chunk_row(&conn, "chunk-inspect-target", "doc-chunk-host", 0, chunk_body);
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri("chunk:chunk-inspect-target"),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        assert_eq!(body["node"]["kind"], "chunk");
        assert_eq!(body["full_text"].as_str().unwrap(), chunk_body);
        assert!(body["triples_in"].as_array().unwrap().is_empty());
        assert!(body["triples_out"].as_array().unwrap().is_empty());
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_cluster_returns_label_and_abstraction() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let cluster_id = "cl-inspect-target";
        let abstraction_text = "Discussions about the deploy pipeline and on-call rotation.";
        {
            let conn = h.open_db();
            seed_cluster_row(&conn, cluster_id, 12345);
            seed_abstraction_row(&conn, "abs-1", cluster_id, abstraction_text);
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri(&format!("cl:{cluster_id}")),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        assert_eq!(body["node"]["kind"], "cluster");
        let full_text = body["full_text"].as_str().unwrap();
        assert!(
            full_text.contains(cluster_id),
            "full_text must include cluster label: {full_text}"
        );
        assert!(
            full_text.contains(abstraction_text),
            "full_text must include abstraction text: {full_text}"
        );
        // "label\n\nabstraction" -- separated by blank line for the
        // inspector renderer.
        assert!(full_text.contains("\n\n"), "label and abstraction must be separated: {full_text}");
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_entity_returns_triples_only() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            let rowid = seed_episode(
                &conn,
                "e5550000-0000-7000-8000-000000000001",
                100,
                "host episode",
            );
            // 5 triples that reference Alice (as subject or object).
            seed_triple_row(&conn, "t-ent-1", "Alice", "knows", "Bob", Some(rowid));
            seed_triple_row(&conn, "t-ent-2", "Alice", "works_at", "Anthropic", Some(rowid));
            seed_triple_row(&conn, "t-ent-3", "user", "met", "Alice", Some(rowid));
            seed_triple_row(&conn, "t-ent-4", "Alice", "owns", "laptop", Some(rowid));
            seed_triple_row(&conn, "t-ent-5", "Carol", "mentors", "Alice", Some(rowid));
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri("ent:Alice"),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        assert_eq!(body["node"]["kind"], "entity");
        assert_eq!(body["node"]["id"], "ent:Alice");
        assert!(
            body["full_text"].is_null(),
            "entity full_text must be null (entities have no body): {body}"
        );
        let triples_out = body["triples_out"].as_array().unwrap();
        assert_eq!(triples_out.len(), 5, "{body}");
        assert!(body["triples_in"].as_array().unwrap().is_empty());
        for e in triples_out {
            assert_eq!(e["kind"], "triple");
            assert_eq!(e["source"], "ent:Alice");
            // Counterpart is always an entity; Alice never appears on
            // both ends so target != source.
            assert!(e["target"].as_str().unwrap().starts_with("ent:"));
            assert_ne!(e["target"], "ent:Alice");
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_entity_with_zero_triples_returns_404() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        // Seed unrelated triples so the table isn't empty; the target
        // entity still has zero references.
        {
            let conn = h.open_db();
            let rowid = seed_episode(
                &conn,
                "e6660000-0000-7000-8000-000000000001",
                100,
                "ep",
            );
            seed_triple_row(&conn, "t-other", "Bob", "knows", "Carol", Some(rowid));
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri("ent:Nonexistent"),
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
        let err = body["error"].as_str().unwrap_or_default();
        assert!(
            err.contains("Nonexistent") || err.contains("entity"),
            "error must mention entity: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_404_on_missing_node() {
        // Well-formed `ep:` prefix + valid UUID shape, but no row in DB.
        let runtime = rt();
        let h = Harness::new(&runtime);
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
            None,
        ));
        assert_eq!(status, StatusCode::NOT_FOUND, "body: {body}");
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_400_on_invalid_prefix() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri("xyz:foo"),
            None,
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
        let err = body["error"].as_str().unwrap_or_default();
        assert!(
            err.contains("xyz") || err.contains("prefix"),
            "error must mention bad prefix: {body}"
        );
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_respects_tenant_scoping() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let memory_id = "a7770000-0000-7000-8000-000000000001";
        {
            let conn = h.open_db();
            seed_episode(&conn, memory_id, 100, "tenant scope");
        }
        // Real id in default tenant resolves; the same request against
        // a never-registered tenant header surfaces 404 from the tenant
        // extractor before the handler runs.
        let r = h.router.clone();
        let (status, _) = runtime.block_on(async {
            let req = Request::builder()
                .method("GET")
                .uri(inspect_uri(&format!("ep:{memory_id}")))
                .header("x-solo-tenant", "never-registered-tenant")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.expect("oneshot");
            let s = resp.status();
            let _b = resp.into_body().collect().await.unwrap().to_bytes();
            (s, _b)
        });
        assert_eq!(status, StatusCode::NOT_FOUND);
        // Sanity: same id resolves on the default tenant.
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri(&format!("ep:{memory_id}")),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "default tenant must resolve: {body}");
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_respects_auth_when_enabled() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("inspect-secret".into()));
        // Missing bearer -> 401 before handler runs.
        let (status, _) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
            None,
        ));
        assert_eq!(status, StatusCode::UNAUTHORIZED);
        // Valid bearer + unknown node -> handler runs and returns 404,
        // proving auth passed through.
        let (status, _) = runtime.block_on(call_with_auth(
            h.router.clone(),
            "GET",
            &inspect_uri("ep:99999999-9999-7000-8000-000000000999"),
            None,
            Some("Bearer inspect-secret"),
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    // ---------------------------------------------------------------------
    // v0.10.0: GET /v1/graph/neighbors/{id}
    //
    // Unified explicit + HNSW-semantic neighbor surface for solo-web's
    // "show similar" overlay. Tests cover the kind dispatch (explicit /
    // semantic / both default), threshold filter, limit clamp, dedupe
    // rule, and the standard 400/404/auth/tenant gates.
    // ---------------------------------------------------------------------

    /// URL builder for the neighbors endpoint. `kind`/`threshold`/`limit`
    /// are all optional; pass `None` to omit the corresponding query
    /// parameter. The node id is percent-encoded so `:` survives the path
    /// extractor.
    fn neighbors_uri(
        node_id: &str,
        kind: Option<&str>,
        threshold: Option<f32>,
        limit: Option<u32>,
    ) -> String {
        let mut qs: Vec<String> = Vec::new();
        if let Some(k) = kind {
            qs.push(format!("kind={k}"));
        }
        if let Some(t) = threshold {
            qs.push(format!("threshold={t}"));
        }
        if let Some(l) = limit {
            qs.push(format!("limit={l}"));
        }
        let encoded = percent_encode_node_id(node_id);
        if qs.is_empty() {
            format!("/v1/graph/neighbors/{encoded}")
        } else {
            format!("/v1/graph/neighbors/{encoded}?{}", qs.join("&"))
        }
    }

    /// 1. `?kind=explicit` returns only structural edges (no semantic).
    /// Seeds an episode with 2 explicit (triple) neighbors + several
    /// distinct other episodes so the semantic path COULD surface
    /// candidates. The `kind=explicit` filter must drop all of them.
    #[test]
    fn neighbors_explicit_only_returns_no_semantic_edges() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            // Seed several episodes via the writer-actor so they get HNSW
            // entries -- the semantic path would surface these if it
            // wasn't filtered out.
            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
            // Add explicit triples sourced from `focal`. seed_triple_row
            // needs the focal rowid -- look it up via a side connection.
            {
                let conn = h.open_db();
                let rowid: i64 = conn
                    .query_row(
                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
                        rusqlite::params![&focal],
                        |r| r.get(0),
                    )
                    .unwrap();
                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
                seed_triple_row(&conn, "t-exp-2", "Alice", "owns", "laptop", Some(rowid));
            }
            let (status, body) = call(
                h.router.clone(),
                "GET",
                &neighbors_uri(&format!("ep:{focal}"), Some("explicit"), None, None),
                None,
            )
            .await;
            assert_eq!(status, StatusCode::OK, "body: {body}");
            let edges = body["edges"].as_array().unwrap();
            assert!(!edges.is_empty(), "expected explicit edges: {body}");
            for e in edges {
                assert_ne!(
                    e["kind"], "semantic",
                    "kind=explicit must drop semantic edges: {body}"
                );
            }
        });
        h.shutdown(&runtime);
    }

    /// 2. `?kind=semantic` returns only HNSW edges (no explicit).
    /// Inverse of test 1 -- same fixture, opposite filter.
    #[test]
    fn neighbors_semantic_only_returns_no_explicit_edges() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
            let _other2 = post_remember(h.router.clone(), "gamma gamma gamma").await;
            {
                let conn = h.open_db();
                let rowid: i64 = conn
                    .query_row(
                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
                        rusqlite::params![&focal],
                        |r| r.get(0),
                    )
                    .unwrap();
                seed_triple_row(&conn, "t-exp-1", "Alice", "knows", "Bob", Some(rowid));
            }
            // Threshold=0 so every HNSW hit clears the filter.
            let (status, body) = call(
                h.router.clone(),
                "GET",
                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
                None,
            )
            .await;
            assert_eq!(status, StatusCode::OK, "body: {body}");
            let edges = body["edges"].as_array().unwrap();
            for e in edges {
                assert_eq!(
                    e["kind"], "semantic",
                    "kind=semantic must drop explicit edges: {body}"
                );
                assert!(e["weight"].is_number(), "semantic edges carry weight: {body}");
            }
        });
        h.shutdown(&runtime);
    }

    /// 3. Default (no `kind=` param) returns both explicit + semantic.
    #[test]
    fn neighbors_both_default_returns_combined() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
            let _other1 = post_remember(h.router.clone(), "beta beta beta").await;
            {
                let conn = h.open_db();
                let rowid: i64 = conn
                    .query_row(
                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
                        rusqlite::params![&focal],
                        |r| r.get(0),
                    )
                    .unwrap();
                seed_triple_row(&conn, "t-both-1", "Alice", "met", "Bob", Some(rowid));
            }
            let (status, body) = call(
                h.router.clone(),
                "GET",
                // No kind param -> default = both. Threshold 0 so semantic
                // hits make it through the filter.
                &neighbors_uri(&format!("ep:{focal}"), None, Some(0.0), None),
                None,
            )
            .await;
            assert_eq!(status, StatusCode::OK, "body: {body}");
            let edges = body["edges"].as_array().unwrap();
            let kinds: std::collections::HashSet<&str> = edges
                .iter()
                .map(|e| e["kind"].as_str().unwrap())
                .collect();
            assert!(
                kinds.contains("triple"),
                "expected at least one triple edge: {body}"
            );
            assert!(
                kinds.contains("semantic"),
                "expected at least one semantic edge: {body}"
            );
        });
        h.shutdown(&runtime);
    }

    /// 4. Dedupe rule. Construct an episode X whose semantic-neighbor Y
    /// is ALSO a triple-target -- i.e. the explicit and semantic paths
    /// both produce an edge X -> Y. After dedupe only the explicit edge
    /// survives.
    #[test]
    fn neighbors_dedupes_semantic_when_explicit_exists() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
            // Seed an explicit triple from focal -> ent:peer-target.
            // The semantic path produces edges focal -> ep:<other>; we
            // ensure both paths produce an edge ending at the same id by
            // wiring `peer-target = ep:<other_memory_id>` -- but the
            // entity emitter uses `ent:` prefix, not `ep:`. So to force a
            // collision we need an edge form where source+target overlap.
            //
            // Simpler construction: the `expand_triple_from_episode` path
            // emits an edge `ent:subject -> ent:object`, not from the
            // focal episode -- meaning the explicit edges don't end at
            // an ep: node in the first place. So we have to engineer a
            // collision via the cluster_member path:
            //   * explicit: focal (episode) -> cluster (via cluster_member)
            //   * semantic: focal -> similar episode
            // The two endpoints (cluster vs. episode) never collide in
            // shape. To produce a real (source, target) overlap that
            // exercises the dedupe code, mint a synthetic semantic edge
            // by adding an explicit triple sourced from the focal that
            // happens to end at the SAME entity the semantic path would
            // emit -- but semantic only emits ep:/chunk: ids, never ent:.
            //
            // The brief flagged this scenario as unlikely. Build the
            // simplest collision the codebase admits: have the focal
            // episode's semantic neighbor's memory_id appear as a
            // triple's object_id (formatted as ent:<that-uuid>). The
            // explicit edge is then `ent:<self-subject> -> ent:<uuid>`;
            // the semantic edge is `ep:focal -> ep:<uuid>`. The (source,
            // target) pair DIFFERS (`ent:X` vs `ep:focal`), so dedupe
            // would NOT fire -- which is correct: those are structurally
            // different relationships.
            //
            // Therefore the realistic dedupe test is the trivial
            // tautology: explicit and semantic produce no collisions in
            // practice. Lock that in by asserting that the same memory_id
            // never appears with an edge from both paths.
            let _other = post_remember(h.router.clone(), "beta beta beta").await;
            {
                let conn = h.open_db();
                let rowid: i64 = conn
                    .query_row(
                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
                        rusqlite::params![&focal],
                        |r| r.get(0),
                    )
                    .unwrap();
                seed_triple_row(
                    &conn,
                    "t-dedupe-1",
                    "Alice",
                    "knows",
                    "Bob",
                    Some(rowid),
                );
            }
            let (status, body) = call(
                h.router.clone(),
                "GET",
                &neighbors_uri(&format!("ep:{focal}"), Some("both"), Some(0.0), None),
                None,
            )
            .await;
            assert_eq!(status, StatusCode::OK, "body: {body}");
            // For every edge, count occurrences of (source, target). No
            // pair should appear twice (which is what the dedupe rule
            // guarantees).
            let edges = body["edges"].as_array().unwrap();
            let mut seen: std::collections::HashMap<(String, String), i32> =
                std::collections::HashMap::new();
            for e in edges {
                let key = (
                    e["source"].as_str().unwrap().to_string(),
                    e["target"].as_str().unwrap().to_string(),
                );
                *seen.entry(key).or_insert(0) += 1;
            }
            for (pair, count) in &seen {
                assert_eq!(
                    *count, 1,
                    "edge pair {pair:?} appears {count} times -- dedupe rule violated: {body}"
                );
            }
        });
        h.shutdown(&runtime);
    }

    /// 5. Threshold filter -- raising the threshold drops low-similarity
    /// semantic neighbors.
    #[test]
    fn neighbors_threshold_filters_low_similarity() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let focal = post_remember(h.router.clone(), "alpha alpha alpha").await;
            let _o1 = post_remember(h.router.clone(), "beta one").await;
            let _o2 = post_remember(h.router.clone(), "beta two").await;
            let _o3 = post_remember(h.router.clone(), "beta three").await;
            // Low threshold -- expect more semantic hits.
            let (status, low_body) = call(
                h.router.clone(),
                "GET",
                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.0), None),
                None,
            )
            .await;
            assert_eq!(status, StatusCode::OK, "body: {low_body}");
            let low_edge_count = low_body["edges"].as_array().unwrap().len();
            // High threshold -- expect fewer (or equal) semantic hits.
            let (status, high_body) = call(
                h.router.clone(),
                "GET",
                &neighbors_uri(&format!("ep:{focal}"), Some("semantic"), Some(0.99), None),
                None,
            )
            .await;
            assert_eq!(status, StatusCode::OK, "body: {high_body}");
            let high_edge_count = high_body["edges"].as_array().unwrap().len();
            assert!(
                high_edge_count <= low_edge_count,
                "high-threshold ({high_edge_count}) must not exceed low-threshold ({low_edge_count}): low={low_body}, high={high_body}"
            );
            // Also assert every surviving high-threshold edge satisfies
            // the filter.
            for e in high_body["edges"].as_array().unwrap() {
                if let Some(w) = e["weight"].as_f64() {
                    assert!(
                        w >= 0.99,
                        "edge with weight {w} survived threshold=0.99: {e}"
                    );
                }
            }
        });
        h.shutdown(&runtime);
    }

    /// 6. `?limit=999` is silently clamped at the family ceiling (100) --
    /// same policy as `/v1/graph/expand`.
    #[test]
    fn neighbors_limit_clamped_at_100() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        // Seed a cluster with > 100 episodes so the explicit cluster_member
        // path could surface > 100 -- clamp must cap at 100.
        {
            let conn = h.open_db();
            seed_cluster_row(&conn, "cl-huge-n", 1000);
            for i in 0..150 {
                let mid = format!("99119911-1111-7000-8000-{:012}", i);
                seed_episode(&conn, &mid, 100 + i as i64, &format!("content {i}"));
                seed_cluster_member(&conn, "cl-huge-n", &mid);
            }
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &neighbors_uri("cl:cl-huge-n", Some("explicit"), None, Some(999)),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "body: {body}");
        let edges = body["edges"].as_array().unwrap();
        assert_eq!(
            edges.len(),
            100,
            "limit must be silently clamped to 100, got {}",
            edges.len()
        );
        h.shutdown(&runtime);
    }

    /// 7. `kind=semantic` on a document focal node returns 400.
    #[test]
    fn neighbors_semantic_rejects_document_source() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let doc_id = "d-semrej-0000-7000-8000-000000000001";
        {
            let conn = h.open_db();
            seed_document_row(&conn, doc_id, "host");
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &neighbors_uri(
                &format!("doc:{doc_id}"),
                Some("semantic"),
                None,
                None,
            ),
            None,
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
        let err = body["error"].as_str().unwrap_or_default();
        assert!(
            err.contains("episode") && err.contains("chunk"),
            "error must list supported kinds: {body}"
        );
        h.shutdown(&runtime);
    }

    /// 8. `kind=semantic` on a cluster focal node returns 400.
    #[test]
    fn neighbors_semantic_rejects_cluster_source() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let cluster_id = "cl-semrej-target";
        {
            let conn = h.open_db();
            seed_cluster_row(&conn, cluster_id, 12345);
        }
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &neighbors_uri(
                &format!("cl:{cluster_id}"),
                Some("semantic"),
                None,
                None,
            ),
            None,
        ));
        assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
        h.shutdown(&runtime);
    }

    /// 9. Entity focal node returns only explicit triple edges; no
    /// semantic edges (entities have no embeddings, semantic path is
    /// silently skipped under `kind=both`).
    #[test]
    fn neighbors_entity_returns_triples_only() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            // Use the writer-actor so the host episode lands in HNSW too
            // (any HNSW state is irrelevant since entities can't trigger
            // semantic recall; included to prove the semantic path is
            // silently skipped, not erroring).
            let host_mid = post_remember(h.router.clone(), "Alice and Bob talked").await;
            {
                let conn = h.open_db();
                let rowid: i64 = conn
                    .query_row(
                        "SELECT rowid FROM episodes WHERE memory_id = ?1",
                        rusqlite::params![&host_mid],
                        |r| r.get(0),
                    )
                    .unwrap();
                seed_triple_row(&conn, "t-ent-n-1", "Alice", "knows", "Bob", Some(rowid));
                seed_triple_row(&conn, "t-ent-n-2", "Alice", "works_at", "Acme", Some(rowid));
            }
            let (status, body) = call(
                h.router.clone(),
                "GET",
                &neighbors_uri("ent:Alice", None, Some(0.0), None),
                None,
            )
            .await;
            assert_eq!(status, StatusCode::OK, "body: {body}");
            let edges = body["edges"].as_array().unwrap();
            assert!(!edges.is_empty(), "expected explicit triples: {body}");
            for e in edges {
                assert_eq!(
                    e["kind"], "triple",
                    "entity focal must produce only triple edges: {body}"
                );
            }
        });
        h.shutdown(&runtime);
    }

    /// 10. Cross-tenant lookups are blocked at the TenantExtractor before
    /// the handler runs.
    #[test]
    fn neighbors_respects_tenant_scoping() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let memory_id = "a8880000-0000-7000-8000-000000000001";
        {
            let conn = h.open_db();
            seed_episode(&conn, memory_id, 100, "tenant scope");
        }
        // Wrong tenant header -> 404 from registry, before handler runs.
        let r = h.router.clone();
        let (status, _) = runtime.block_on(async {
            let req = Request::builder()
                .method("GET")
                .uri(neighbors_uri(
                    &format!("ep:{memory_id}"),
                    Some("explicit"),
                    None,
                    None,
                ))
                .header("x-solo-tenant", "never-registered-tenant-n")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.expect("oneshot");
            let s = resp.status();
            let _b = resp.into_body().collect().await.unwrap().to_bytes();
            (s, _b)
        });
        assert_eq!(status, StatusCode::NOT_FOUND);
        // Sanity: same id resolves on default tenant.
        let (status, body) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &neighbors_uri(&format!("ep:{memory_id}"), Some("explicit"), None, None),
            None,
        ));
        assert_eq!(status, StatusCode::OK, "default tenant must resolve: {body}");
        h.shutdown(&runtime);
    }

    /// 11. Bearer-auth gate: missing token -> 401; valid token + unknown
    /// node -> 404 (auth passed, handler ran).
    #[test]
    fn neighbors_respects_auth_when_enabled() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("neighbors-secret".into()));
        // Missing Authorization -> 401.
        let (status, _) = runtime.block_on(call(
            h.router.clone(),
            "GET",
            &neighbors_uri(
                "ep:99999999-9999-7000-8000-000000000999",
                Some("explicit"),
                None,
                None,
            ),
            None,
        ));
        assert_eq!(status, StatusCode::UNAUTHORIZED);
        // Valid bearer + unknown node -> 404 from the handler.
        let (status, _) = runtime.block_on(call_with_auth(
            h.router.clone(),
            "GET",
            &neighbors_uri(
                "ep:99999999-9999-7000-8000-000000000999",
                Some("explicit"),
                None,
                None,
            ),
            None,
            Some("Bearer neighbors-secret"),
        ));
        assert_eq!(status, StatusCode::NOT_FOUND);
        h.shutdown(&runtime);
    }

    // ---------------------------------------------------------------------
    // v0.10.0: GET /v1/graph/stream — SSE invalidation feed
    //
    // Driving SSE through axum's in-process router (`oneshot`) requires
    // reading the response body as a stream of frames and parsing each
    // chunk against the SSE wire format (`event: NAME\ndata: JSON\n\n`).
    // The `read_one_sse_event` helper below does that incrementally so
    // tests don't have to wait for the stream to close (which would
    // never happen — the SSE loop runs until the client drops).
    // ---------------------------------------------------------------------

    /// One parsed SSE event: the `event:` field plus the `data:` payload
    /// re-parsed as JSON. Empty / comment-only frames are filtered out
    /// by the parser; callers only see real events.
    #[derive(Debug, Clone)]
    struct ParsedSseEvent {
        event: String,
        data: Value,
    }

    /// Read frames off the SSE body until ONE complete event lands, then
    /// return it. Times out after `timeout` to keep red-test feedback
    /// fast. On timeout returns `None`.
    async fn read_one_sse_event(
        body: &mut axum::body::Body,
        timeout: std::time::Duration,
    ) -> Option<ParsedSseEvent> {
        use http_body_util::BodyExt;
        let mut buf = String::new();
        let start = std::time::Instant::now();
        loop {
            if start.elapsed() >= timeout {
                return None;
            }
            let remaining = timeout.saturating_sub(start.elapsed());
            let frame_res =
                tokio::time::timeout(remaining, body.frame()).await;
            let frame = match frame_res {
                Ok(Some(Ok(f))) => f,
                Ok(Some(Err(_))) | Ok(None) => return None,
                Err(_) => return None,
            };
            if let Ok(data) = frame.into_data() {
                buf.push_str(&String::from_utf8_lossy(&data));
                // Parse complete events (double newline separator).
                while let Some(idx) = buf.find("\n\n") {
                    let block: String = buf.drain(..idx + 2).collect();
                    if let Some(parsed) = parse_sse_block(&block) {
                        return Some(parsed);
                    }
                }
            }
        }
    }

    /// Parse one SSE block (raw text between two `\n\n` separators).
    /// Returns `None` for comment-only blocks (lines starting with `:`)
    /// or blocks missing either `event:` or `data:`.
    fn parse_sse_block(block: &str) -> Option<ParsedSseEvent> {
        let mut event: Option<String> = None;
        let mut data: Option<String> = None;
        for line in block.lines() {
            if let Some(rest) = line.strip_prefix("event:") {
                event = Some(rest.trim().to_string());
            } else if let Some(rest) = line.strip_prefix("data:") {
                data = Some(rest.trim().to_string());
            }
        }
        let event = event?;
        let data_str = data?;
        let data_json = serde_json::from_str(&data_str).ok()?;
        Some(ParsedSseEvent {
            event,
            data: data_json,
        })
    }

    /// Open the SSE stream and return the response body for further
    /// frame-level reads. The headers are validated (Content-Type +
    /// status) before the body is returned.
    async fn open_sse_stream_inner(
        router: axum::Router,
        auth: Option<&str>,
        tenant: Option<&str>,
    ) -> (StatusCode, axum::body::Body) {
        let mut builder = Request::builder()
            .method("GET")
            .uri("/v1/graph/stream");
        if let Some(a) = auth {
            builder = builder.header("authorization", a);
        }
        if let Some(t) = tenant {
            builder = builder.header("x-solo-tenant", t);
        }
        let req = builder
            .header("content-length", "0")
            .body(Body::empty())
            .unwrap();
        let resp = router.oneshot(req).await.expect("oneshot");
        let status = resp.status();
        let body = resp.into_body();
        (status, body)
    }

    /// 1. `init` event lands as the first chunk.
    #[test]
    fn stream_emits_init_event_on_connect() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async {
            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
            assert_eq!(status, StatusCode::OK);
            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
                .await
                .expect("must receive init event within 2s");
            assert_eq!(ev.event, "init");
            assert_eq!(ev.data["connected"].as_bool(), Some(true));
            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
            assert!(ev.data["ts_ms"].is_number());
        });
        h.shutdown(&runtime);
    }

    /// 2. Firing an InvalidateEvent on the broadcast channel surfaces
    /// as an `invalidate` SSE event.
    #[test]
    fn stream_emits_invalidate_after_writer_event() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let sender = h.invalidate_sender();
        runtime.block_on(async {
            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
            assert_eq!(status, StatusCode::OK);
            // Discard the init event.
            let init = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
                .await
                .unwrap();
            assert_eq!(init.event, "init");
            // Fire a writer-actor-style event on the broadcast.
            sender
                .send(InvalidateEvent {
                    reason: "memory.remember".to_string(),
                    tenant_id: "default".to_string(),
                    ts_ms: 1_715_625_600_000,
                    kind: "episode".to_string(),
                })
                .expect("must have at least one subscriber");
            // The SSE handler must surface it.
            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
                .await
                .expect("invalidate event must arrive within 2s");
            assert_eq!(ev.event, "invalidate");
            assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
            assert_eq!(ev.data["kind"].as_str(), Some("episode"));
        });
        h.shutdown(&runtime);
    }

    /// 3. Each kind of writer-actor event surfaces with its mapped
    /// `(reason, kind)` shape.
    #[test]
    fn stream_emits_invalidate_for_each_writer_command() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let sender = h.invalidate_sender();
        let cases = [
            ("memory.remember", "episode"),
            ("memory.forget", "episode"),
            ("memory.consolidate", "cluster"),
            ("memory.ingest_document", "document"),
            ("memory.forget_document", "document"),
            ("memory.triples_extract", "cluster"),
            ("memory.reembed", "episode"),
            ("gdpr.forget_user", "tenant"),
        ];
        runtime.block_on(async {
            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
            assert_eq!(status, StatusCode::OK);
            // Discard the init.
            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
                .await
                .unwrap();
            for (reason, kind) in cases {
                sender
                    .send(InvalidateEvent {
                        reason: reason.to_string(),
                        tenant_id: "default".to_string(),
                        ts_ms: 1_715_625_600_000,
                        kind: kind.to_string(),
                    })
                    .unwrap();
                let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
                    .await
                    .unwrap_or_else(|| panic!("must receive event for {reason}"));
                assert_eq!(ev.event, "invalidate");
                assert_eq!(
                    ev.data["reason"].as_str(),
                    Some(reason),
                    "reason mismatch"
                );
                assert_eq!(ev.data["kind"].as_str(), Some(kind), "kind mismatch");
            }
        });
        h.shutdown(&runtime);
    }

    /// 4. Heartbeat events fire on the configured interval when no real
    /// events arrive. Drives `build_invalidate_stream` at a 1-second
    /// heartbeat (the public handler uses 30s in prod), wraps it in an
    /// `Sse` response, then reads + parses the SSE body via the same
    /// `read_one_sse_event` helper the HTTP-layer tests use. This
    /// exercises the public Event → body byte path without touching
    /// `Event::finalize` (which is private).
    #[test]
    fn stream_emits_heartbeat_when_no_events() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let sender = h.invalidate_sender();
        runtime.block_on(async {
            // Subscribe FIRST so a later writer-side `send` would lag
            // the receiver if the subscriber stalled.
            let rx = sender.subscribe();
            // Build the SSE stream with a 1-second heartbeat interval —
            // bypassing the 30s production default.
            let stream = build_invalidate_stream(rx, "default".to_string(), 1);
            // Wrap in an Sse response + extract the body bytes through
            // axum's IntoResponse path. This produces real on-the-wire
            // SSE bytes that `read_one_sse_event` can parse.
            let sse: Sse<_> = Sse::new(stream);
            let resp = sse.into_response();
            let mut body = resp.into_body();
            // First event must be `init`.
            let first =
                read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
                    .await
                    .expect("init event must arrive");
            assert_eq!(first.event, "init");
            // Second must be heartbeat (no invalidates fired, ~1s
            // interval; allow 3s window for runtime jitter).
            let second =
                read_one_sse_event(&mut body, std::time::Duration::from_secs(3))
                    .await
                    .expect("heartbeat event must arrive within 3s");
            assert_eq!(second.event, "heartbeat");
            assert!(second.data["ts_ms"].is_number());
        });
        h.shutdown(&runtime);
    }

    /// 5. Two subscribers connected to the same tenant both receive
    /// every invalidate.
    #[test]
    fn stream_concurrent_subscribers_same_tenant() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r1 = h.router.clone();
        let r2 = h.router.clone();
        let r3 = h.router.clone();
        let sender = h.invalidate_sender();
        runtime.block_on(async {
            // Open three subscribers.
            let (s1, mut body1) = open_sse_stream_inner(r1, None, None).await;
            let (s2, mut body2) = open_sse_stream_inner(r2, None, None).await;
            let (s3, mut body3) = open_sse_stream_inner(r3, None, None).await;
            assert_eq!(s1, StatusCode::OK);
            assert_eq!(s2, StatusCode::OK);
            assert_eq!(s3, StatusCode::OK);
            // Drain init events from each.
            for body in [&mut body1, &mut body2, &mut body3] {
                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
                    .await
                    .unwrap();
                assert_eq!(ev.event, "init");
            }
            // Receiver count should be at least 3 now.
            assert!(
                sender.receiver_count() >= 3,
                "expected ≥3 subscribers, got {}",
                sender.receiver_count()
            );
            // Fire one invalidate.
            sender
                .send(InvalidateEvent {
                    reason: "memory.remember".to_string(),
                    tenant_id: "default".to_string(),
                    ts_ms: 1_715_625_600_000,
                    kind: "episode".to_string(),
                })
                .expect("send must succeed");
            // All three receive it.
            for body in [&mut body1, &mut body2, &mut body3] {
                let ev = read_one_sse_event(body, std::time::Duration::from_secs(2))
                    .await
                    .unwrap();
                assert_eq!(ev.event, "invalidate");
                assert_eq!(ev.data["reason"].as_str(), Some("memory.remember"));
            }
        });
        h.shutdown(&runtime);
    }

    /// 6. Dropping the SSE client decrements the per-tenant subscriber
    /// count — graceful cleanup invariant.
    #[test]
    fn stream_handles_client_disconnect_gracefully() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let sender = h.invalidate_sender();
        let before = sender.receiver_count();
        runtime.block_on(async {
            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
            assert_eq!(status, StatusCode::OK);
            // Drain the init so the stream is fully active.
            let _ = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
                .await
                .unwrap();
            let during = sender.receiver_count();
            assert!(
                during > before,
                "subscriber count must increase while stream is live (before={before}, during={during})"
            );
            // Drop the body — simulates the client closing the
            // connection. axum drops the stream future, which drops the
            // Receiver.
            drop(body);
        });
        // Allow tokio a beat to drop the Receiver task.
        runtime.block_on(async {
            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
        });
        let after = sender.receiver_count();
        assert!(
            after <= before,
            "subscriber count must drop back after disconnect (before={before}, after={after})"
        );
        h.shutdown(&runtime);
    }

    /// 7. Bearer-auth gate: missing token -> 401.
    #[test]
    fn stream_respects_auth_when_enabled() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
        let r = h.router.clone();
        runtime.block_on(async {
            let (status, _body) = open_sse_stream_inner(r, None, None).await;
            assert_eq!(status, StatusCode::UNAUTHORIZED);
        });
        h.shutdown(&runtime);
    }

    /// 8. Anonymous OK when auth=None (loopback default).
    #[test]
    fn stream_works_with_auth_none() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async {
            let (status, mut body) = open_sse_stream_inner(r, None, None).await;
            assert_eq!(status, StatusCode::OK);
            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
                .await
                .expect("must receive init event");
            assert_eq!(ev.event, "init");
        });
        h.shutdown(&runtime);
    }

    /// 9. Bearer-auth gate: valid token allows the stream to open.
    #[test]
    fn stream_respects_auth_accepts_valid_token() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("stream-secret".into()));
        let r = h.router.clone();
        runtime.block_on(async {
            let (status, mut body) =
                open_sse_stream_inner(r, Some("Bearer stream-secret"), None).await;
            assert_eq!(status, StatusCode::OK);
            let ev = read_one_sse_event(&mut body, std::time::Duration::from_secs(2))
                .await
                .expect("must receive init event with valid bearer");
            assert_eq!(ev.event, "init");
            assert_eq!(ev.data["tenant_id"].as_str(), Some("default"));
        });
        h.shutdown(&runtime);
    }

    /// 10. Cross-tenant lookups are 404 at TenantExtractor before the
    /// stream opens — wrong tenant header never reaches the handler.
    #[test]
    fn stream_respects_tenant_scoping() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async {
            let (status, _body) =
                open_sse_stream_inner(r, None, Some("never-registered-tenant-x")).await;
            // The single-tenant test registry returns NotFound from
            // get_or_open when the header points to a tenant that isn't
            // cached; the TenantExtractor maps that to 404.
            assert_eq!(status, StatusCode::NOT_FOUND);
        });
        h.shutdown(&runtime);
    }

    // -----------------------------------------------------------------
    // /v1/tenants — principal-scoped tenant list (v0.10.0)
    //
    // Seeds the harness's in-memory tenants_index stub via
    // `harness.registry.with_index(|idx| idx.register(...))` to drive
    // the read-only list endpoint. The default tenant from the
    // harness's HashMap is NOT in the index stub by construction (the
    // `for_tests_with_single_tenant` factory only wires the cached
    // HashMap entry; the index starts empty after migrations), so each
    // test that wants the default tenant listed registers it
    // explicitly. This keeps the test setup explicit about what's
    // visible to `list_active` versus what's open in memory.
    // -----------------------------------------------------------------

    /// Seed three Active tenants into the registry's index. Returns the
    /// ids in the order they were registered, which is the order
    /// `list_active` will return them in (ORDER BY created_at_ms ASC).
    async fn seed_three_tenants(registry: &TenantRegistry) -> Vec<String> {
        use solo_core::TenantId as TenantIdT;
        let ids = ["alice", "bob", "default"];
        for id in ids {
            let tid = TenantIdT::new(id).unwrap();
            registry
                .with_index(|idx| {
                    idx.register(&tid, &format!("{id}.db"), Some(&format!("{id} tenant")))
                        .unwrap();
                    // Ensure created_at_ms diverges so the ASC sort is
                    // deterministic — the index uses `chrono::Utc::now()`
                    // per row and 3 sequential inserts can land in the
                    // same ms on fast hardware.
                })
                .await;
            tokio::time::sleep(std::time::Duration::from_millis(2)).await;
        }
        // Sort matches the `created_at_ms ASC, tenant_id ASC` order
        // `TenantsIndex::list` returns. We inserted in (alice, bob,
        // default) order with 2ms gaps, so that's the expected order.
        vec!["alice".into(), "bob".into(), "default".into()]
    }

    /// 1. With `AuthConfig::None`, the handler returns every tenant
    ///    visible in the registry — same scope as `solo tenants list`.
    ///    Exercises the "no principal" branch of the visibility filter.
    #[test]
    fn tenants_returns_all_when_auth_none() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async {
            let _expected = seed_three_tenants(&h.registry).await;
            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
            assert_eq!(status, StatusCode::OK);
            let arr = body
                .get("tenants")
                .and_then(|v| v.as_array())
                .expect("tenants array");
            assert_eq!(arr.len(), 3, "got body: {body}");
            let ids: Vec<&str> =
                arr.iter().filter_map(|t| t["id"].as_str()).collect();
            assert_eq!(ids, vec!["alice", "bob", "default"]);
        });
        h.shutdown(&runtime);
    }

    /// 2. Under Bearer auth (single-principal mode), the handler
    ///    returns every tenant — the bearer holder is treated as the
    ///    daemon operator with full visibility. Exercises the bearer
    ///    branch of the visibility filter.
    #[test]
    fn tenants_returns_all_when_bearer_auth() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("tlist-secret".into()));
        let r = h.router.clone();
        runtime.block_on(async {
            seed_three_tenants(&h.registry).await;
            let (status, body) = call_with_auth(
                r,
                "GET",
                "/v1/tenants",
                None,
                Some("Bearer tlist-secret"),
            )
            .await;
            assert_eq!(status, StatusCode::OK, "got body: {body}");
            let arr = body["tenants"].as_array().expect("tenants array");
            assert_eq!(arr.len(), 3, "bearer must see all tenants");
        });
        h.shutdown(&runtime);
    }

    /// 3. Under OIDC, an authenticated principal carrying
    ///    `tenant_claim = "alice"` sees ONLY alice — not bob, not
    ///    default. Exercises the OIDC branch of the visibility filter.
    #[test]
    fn tenants_filters_to_principal_claim_when_oidc() {
        let runtime = rt();
        let (fake_server, discovery_url, secret, kid) =
            runtime.block_on(async { spin_fake_idp().await });
        let server_uri = fake_server.uri();
        let _server_guard = fake_server;

        let auth = crate::auth::AuthConfig::Oidc {
            discovery_url,
            audience: "tlist-audience".to_string(),
            tenant_claim_name: "solo_tenant".to_string(),
        };
        let h = Harness::new_with_auth_config(&runtime, Some(auth));
        let r = h.router.clone();

        runtime.block_on(async {
            seed_three_tenants(&h.registry).await;
            let token = mint_idp_token(
                &server_uri,
                kid,
                &secret,
                "alice",
                "tlist-audience",
            );
            let (status, body) = call_with_auth(
                r,
                "GET",
                "/v1/tenants",
                None,
                Some(&format!("Bearer {token}")),
            )
            .await;
            assert_eq!(status, StatusCode::OK, "got body: {body}");
            let arr = body["tenants"].as_array().expect("tenants array");
            assert_eq!(arr.len(), 1, "OIDC alice must see exactly one tenant");
            assert_eq!(arr[0]["id"].as_str(), Some("alice"));
        });
        h.shutdown(&runtime);
    }

    /// 4. Under OIDC with a `tenant_claim` that doesn't match any
    ///    registered tenant, the response is `200 OK` with
    ///    `tenants: []` — NOT 404. Don't leak whether other tenants
    ///    exist via a status-code side-channel for an OIDC principal
    ///    that lacks visibility to them.
    #[test]
    fn tenants_returns_empty_when_oidc_claim_unmatched() {
        let runtime = rt();
        let (fake_server, discovery_url, secret, kid) =
            runtime.block_on(async { spin_fake_idp().await });
        let server_uri = fake_server.uri();
        let _server_guard = fake_server;

        let auth = crate::auth::AuthConfig::Oidc {
            discovery_url,
            audience: "tlist-audience".to_string(),
            tenant_claim_name: "solo_tenant".to_string(),
        };
        let h = Harness::new_with_auth_config(&runtime, Some(auth));
        let r = h.router.clone();

        runtime.block_on(async {
            seed_three_tenants(&h.registry).await;
            // Mint a token claiming a tenant that IS a valid TenantId
            // (passes middleware) but doesn't exist in the index.
            let token = mint_idp_token(
                &server_uri,
                kid,
                &secret,
                "nonexistent",
                "tlist-audience",
            );
            let (status, body) = call_with_auth(
                r,
                "GET",
                "/v1/tenants",
                None,
                Some(&format!("Bearer {token}")),
            )
            .await;
            assert_eq!(
                status,
                StatusCode::OK,
                "must be 200 OK, not 404 — don't leak tenant existence: {body}"
            );
            let arr = body["tenants"].as_array().expect("tenants array");
            assert_eq!(
                arr.len(),
                0,
                "unmatched OIDC claim must produce empty list, got: {body}"
            );
        });
        h.shutdown(&runtime);
    }

    /// 5. JSON response shape matches what solo-web's TypeScript
    ///    client expects: `tenants[*].{id,display_name,created_at_ms,
    ///    status,quota_bytes,episode_count,size_bytes,pct_used,
    ///    last_accessed_ms}`. Catches accidental field renames at PR
    ///    time.
    ///
    ///    v0.10.1: `episode_count` / `size_bytes` / `pct_used` are
    ///    hydrated when the per-tenant DB file exists. This test
    ///    registers a tenant whose DB file does NOT exist (the
    ///    `for_tests_with_single_tenant` harness only writes the
    ///    `default` tenant's DB), so the three numeric fields land as
    ///    JSON `null` — verifying the `null` JSON value (not absence)
    ///    so clients see a stable shape regardless of hydration
    ///    success.
    #[test]
    fn tenants_response_shape_matches_solo_web_types() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async {
            // Register one tenant with a display_name + quota so all
            // optional fields are present in the response.
            let tid = solo_core::TenantId::new("shaped").unwrap();
            h.registry
                .with_index(|idx| {
                    idx.register_with_quota(
                        &tid,
                        "shaped.db",
                        Some("Shaped tenant"),
                        Some(1_048_576),
                    )
                    .unwrap();
                })
                .await;
            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
            assert_eq!(status, StatusCode::OK);
            let item = &body["tenants"][0];
            // id, display_name, created_at_ms, status: required
            assert_eq!(item["id"].as_str(), Some("shaped"));
            assert_eq!(item["display_name"].as_str(), Some("Shaped tenant"));
            assert!(
                item["created_at_ms"].is_i64(),
                "created_at_ms must be an i64, got {item}"
            );
            assert_eq!(item["status"].as_str(), Some("active"));
            // quota_bytes: present + numeric
            assert_eq!(item["quota_bytes"].as_u64(), Some(1_048_576));
            // v0.10.1: episode_count / size_bytes / pct_used become
            // null when the per-tenant DB file is missing on disk
            // (this harness only writes the default tenant's file —
            // shaped.db does not exist). Clients must tolerate the
            // null JSON shape; absence would be a breaking change.
            assert!(
                item["episode_count"].is_null(),
                "episode_count must be JSON null when tenant DB is missing, got {item}"
            );
            assert!(
                item["size_bytes"].is_null(),
                "size_bytes must be JSON null when tenant DB is missing, got {item}"
            );
            assert!(
                item["pct_used"].is_null(),
                "pct_used must be JSON null when size_bytes is null, got {item}"
            );
        });
        h.shutdown(&runtime);
    }

    /// 6. Bearer auth enabled + missing Authorization header → 401
    ///    before the handler runs. Confirms the route is plumbed
    ///    through `auth_middleware` (it sits inside the `authed`
    ///    sub-router, not the `public` one).
    #[test]
    fn tenants_respects_auth_when_enabled() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("must-auth".into()));
        let r = h.router.clone();
        runtime.block_on(async {
            seed_three_tenants(&h.registry).await;
            // No Authorization header → 401.
            let (status, _body) = call(r, "GET", "/v1/tenants", None).await;
            assert_eq!(status, StatusCode::UNAUTHORIZED);
        });
        h.shutdown(&runtime);
    }

    /// 7. `PendingMigration` and `PendingDelete` rows are excluded
    ///    from the response. solo-web's tenant picker should never
    ///    surface a row that's mid-admin-operation (race with admin
    ///    tooling). Only Active tenants make the list.
    #[test]
    fn tenants_status_filter_excludes_non_active() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async {
            // Three tenants, three statuses. Only `keeper` (Active)
            // should appear on the wire.
            let keeper = solo_core::TenantId::new("keeper").unwrap();
            let migrating = solo_core::TenantId::new("migrating").unwrap();
            let deleting = solo_core::TenantId::new("deleting").unwrap();
            h.registry
                .with_index(|idx| {
                    idx.register(&keeper, "keeper.db", None).unwrap();
                    idx.register_with_status(
                        &migrating,
                        "migrating.db",
                        None,
                        solo_storage::TenantStatus::PendingMigration,
                    )
                    .unwrap();
                    idx.register_with_status(
                        &deleting,
                        "deleting.db",
                        None,
                        solo_storage::TenantStatus::PendingDelete,
                    )
                    .unwrap();
                })
                .await;
            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
            assert_eq!(status, StatusCode::OK);
            let arr = body["tenants"].as_array().expect("tenants array");
            let ids: Vec<&str> =
                arr.iter().filter_map(|t| t["id"].as_str()).collect();
            assert_eq!(
                ids,
                vec!["keeper"],
                "only Active tenants visible; got: {body}"
            );
        });
        h.shutdown(&runtime);
    }

    /// 8. Empty registry → `200 OK` with `tenants: []`. Defends
    ///    against accidental `None` serialisation or 404'ing on an
    ///    empty list. solo-web's first paint on a brand-new daemon
    ///    needs an empty array to render the "no tenants yet" state.
    #[test]
    fn tenants_returns_empty_array_when_no_tenants_registered() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async {
            // Don't seed anything — the harness's in-memory index
            // starts at zero rows (the cached default-tenant handle in
            // the HashMap is invisible to `list_active`).
            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
            assert_eq!(status, StatusCode::OK);
            let arr = body["tenants"].as_array().expect("tenants array");
            assert_eq!(arr.len(), 0, "expected empty array, got: {body}");
        });
        h.shutdown(&runtime);
    }

    // ---- v0.10.1: cost-number hydration tests ----
    //
    // These exercise `TenantRegistry::hydrate_tenant_cost_numbers` end-
    // to-end through the `/v1/tenants` handler. The harness's
    // `for_tests_with_single_tenant` registry uses a plain-SQLite tenant
    // DB (not real SQLCipher); the hydration helper has a fallback
    // open path for that case (see registry.rs). The
    // `_tmp_dir/tenants/<filename>` layout matters: that's where the
    // hydration helper looks. These tests create real files there to
    // exercise the size_bytes path; episode_count requires the file to
    // be a SQLite DB with the `episodes` table.
    //
    // The `default` tenant exists at `_tmp_dir/test.db` (set by the
    // harness); the hydration helper expects `_tmp_dir/tenants/<file>`.
    // So we either (a) register a fresh tenant id pointing at a DB we
    // create at the expected layout, or (b) check the documented
    // behavior under "file missing" (returns null counts gracefully).
    // Both shapes are tested here.
    //
    // The constant `TENANTS_COUNT_HYDRATION_CAP` is grep-able.

    /// Helper: create a per-tenant DB file at the layout the hydration
    /// helper expects (`<data_dir>/tenants/<db_filename>`), populated
    /// with the `episodes` table + `n_active` active episodes +
    /// `n_forgotten` forgotten episodes. Returns the absolute path.
    fn seed_per_tenant_db_with_episodes(
        data_dir: &std::path::Path,
        db_filename: &str,
        n_active: i64,
        n_forgotten: i64,
    ) -> std::path::PathBuf {
        let tenants_dir = data_dir.join(solo_storage::TENANTS_SUBDIR);
        std::fs::create_dir_all(&tenants_dir).unwrap();
        let db_path = tenants_dir.join(db_filename);
        // Open as plain SQLite (test path; matches the harness's
        // `open_test_db_at` shape; hydration helper falls back to plain
        // open when SQLCipher open fails).
        let mut conn = rusqlite::Connection::open(&db_path).unwrap();
        // Run the same migrations the real per-tenant DB does so the
        // `episodes` table + `status` CHECK constraint match production.
        solo_storage::run_migrations(&mut conn).unwrap();
        for i in 0..n_active {
            conn.execute(
                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'active', 0, 0)",
                rusqlite::params![format!("a-{i}")],
            )
            .unwrap();
        }
        for i in 0..n_forgotten {
            conn.execute(
                "INSERT INTO episodes (memory_id, ts_ms, source_type, content, confidence, strength, salience, tier, status, created_at_ms, updated_at_ms)
                 VALUES (?, 0, 'user_message', 'x', 0.5, 0.5, 0.5, 'hot', 'forgotten', 0, 0)",
                rusqlite::params![format!("f-{i}")],
            )
            .unwrap();
        }
        drop(conn);
        db_path
    }

    /// v0.10.1 test 1: `episode_count` hydrates to the actual active
    /// episode count when the per-tenant DB exists. Seed 3 active + 2
    /// forgotten episodes; expect `episode_count: 3` (the `status =
    /// 'active'` filter excludes the forgotten rows).
    #[test]
    fn tenants_response_hydrates_episode_count_when_tenant_has_data() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let data_dir = h._tmp.path().to_path_buf();
        runtime.block_on(async {
            let tid = solo_core::TenantId::new("counted").unwrap();
            seed_per_tenant_db_with_episodes(&data_dir, "counted.db", 3, 2);
            h.registry
                .with_index(|idx| {
                    idx.register(&tid, "counted.db", Some("Counted tenant"))
                        .unwrap();
                })
                .await;
            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
            assert_eq!(status, StatusCode::OK);
            let item = &body["tenants"][0];
            assert_eq!(item["id"].as_str(), Some("counted"));
            assert_eq!(
                item["episode_count"].as_i64(),
                Some(3),
                "episode_count must be 3 (active rows only, 2 forgotten excluded); got {item}"
            );
        });
        h.shutdown(&runtime);
    }

    /// v0.10.1 test 2: `size_bytes` reports the on-disk size of the
    /// per-tenant DB file. Asserts the response value matches
    /// `std::fs::metadata(<db_path>).len()` exactly — pins that we
    /// read the right file, not e.g. data_dir or a temp.
    #[test]
    fn tenants_response_hydrates_size_bytes_from_db_file() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let data_dir = h._tmp.path().to_path_buf();
        runtime.block_on(async {
            let tid = solo_core::TenantId::new("sized").unwrap();
            let db_path =
                seed_per_tenant_db_with_episodes(&data_dir, "sized.db", 1, 0);
            h.registry
                .with_index(|idx| {
                    idx.register(&tid, "sized.db", None).unwrap();
                })
                .await;
            let on_disk = std::fs::metadata(&db_path).unwrap().len();
            assert!(on_disk > 0, "test setup: db file should be non-empty");
            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
            assert_eq!(status, StatusCode::OK);
            let item = &body["tenants"][0];
            assert_eq!(item["id"].as_str(), Some("sized"));
            assert_eq!(
                item["size_bytes"].as_u64(),
                Some(on_disk),
                "size_bytes must match fs::metadata; got {item}"
            );
        });
        h.shutdown(&runtime);
    }

    /// v0.10.1 test 3: `pct_used` is computed from `size_bytes /
    /// quota_bytes * 100` when both are known. Pick a quota much
    /// larger than the DB so the percentage stays in a sane range
    /// (and survives any unrelated DB-page padding).
    #[test]
    fn tenants_response_computes_pct_used_when_quota_set() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let data_dir = h._tmp.path().to_path_buf();
        runtime.block_on(async {
            let tid = solo_core::TenantId::new("quoted").unwrap();
            let db_path =
                seed_per_tenant_db_with_episodes(&data_dir, "quoted.db", 1, 0);
            // Pick a quota that's large enough that pct_used lands
            // between 0 and 50% regardless of SQLite page boundary
            // rounding. Asserting an exact float would be flaky.
            let on_disk = std::fs::metadata(&db_path).unwrap().len();
            let quota = on_disk * 4; // pct_used should be ~25%
            h.registry
                .with_index(|idx| {
                    idx.register_with_quota(&tid, "quoted.db", None, Some(quota))
                        .unwrap();
                })
                .await;
            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
            assert_eq!(status, StatusCode::OK);
            let item = &body["tenants"][0];
            let pct = item["pct_used"].as_f64().expect("pct_used must be a number");
            assert!(
                (0.0..=100.0).contains(&pct),
                "pct_used must be in [0, 100], got {pct}"
            );
            // Allow a wide band — exact value depends on SQLite page
            // size — but the recipe (size/quota*100) means a
            // size=quota/4 setup must land near 25%.
            assert!(
                (20.0..=30.0).contains(&pct),
                "pct_used must be ~25% for size=quota/4, got {pct}"
            );
        });
        h.shutdown(&runtime);
    }

    /// v0.10.1 test 4: `pct_used` is `null` when `quota_bytes` is
    /// null (the "unlimited" case). Pins that we don't accidentally
    /// emit a numeric `0.0` or `100.0` for unlimited quotas.
    #[test]
    fn tenants_response_pct_used_null_when_quota_null() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        let data_dir = h._tmp.path().to_path_buf();
        runtime.block_on(async {
            let tid = solo_core::TenantId::new("unlimited").unwrap();
            seed_per_tenant_db_with_episodes(&data_dir, "unlimited.db", 1, 0);
            h.registry
                .with_index(|idx| {
                    idx.register(&tid, "unlimited.db", None).unwrap();
                })
                .await;
            let (status, body) = call(r, "GET", "/v1/tenants", None).await;
            assert_eq!(status, StatusCode::OK);
            let item = &body["tenants"][0];
            assert_eq!(item["id"].as_str(), Some("unlimited"));
            assert!(
                item["quota_bytes"].is_null(),
                "test setup: quota_bytes must be null, got {item}"
            );
            assert!(
                item["pct_used"].is_null(),
                "pct_used must be JSON null when quota_bytes is null, got {item}"
            );
            // size_bytes still present (no quota doesn't suppress
            // size — only pct_used).
            assert!(
                item["size_bytes"].is_u64(),
                "size_bytes must still be present when quota_bytes is null, got {item}"
            );
        });
        h.shutdown(&runtime);
    }

    /// v0.10.1 test 5: the response includes
    /// `X-Solo-Tenants-Count-Cap-Reached: true` when the filtered
    /// tenant count exceeds `TENANTS_COUNT_HYDRATION_CAP`. Tenants
    /// beyond the cap have `episode_count: null` even though their
    /// `size_bytes` is still hydrated (fs::metadata is cheap).
    ///
    /// We don't seed 51 real DBs (would be slow); instead, we
    /// register 51 tenant rows in the index. The cap is documented
    /// to apply to `episode_count` hydration, and the header is
    /// emitted purely from the count of filtered records. The
    /// header semantics here are independent of per-tenant DB
    /// existence.
    #[test]
    fn tenants_response_sets_cap_reached_header_when_over_cap() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async {
            // Register 51 tenants (cap = 50, so we exceed it).
            h.registry
                .with_index(|idx| {
                    for i in 0..51 {
                        let id = format!("t{i:02}");
                        let tid = solo_core::TenantId::new(&id).unwrap();
                        idx.register(&tid, &format!("{id}.db"), None).unwrap();
                    }
                })
                .await;
            // Send a raw request so we can inspect headers.
            use axum::body::Body;
            use axum::http::Request;
            use http_body_util::BodyExt;
            let req = Request::builder()
                .method("GET")
                .uri("/v1/tenants")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.unwrap();
            assert_eq!(resp.status(), StatusCode::OK);
            let cap_header = resp
                .headers()
                .get(X_SOLO_TENANTS_COUNT_CAP_HEADER)
                .expect("cap-reached header must be present");
            assert_eq!(
                cap_header.to_str().unwrap(),
                "true",
                "cap-reached header value must be 'true' when over cap"
            );
            // Parse body to verify shape — beyond-cap tenants have
            // null episode_count.
            let bytes = resp.into_body().collect().await.unwrap().to_bytes();
            let body: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
            let arr = body["tenants"].as_array().expect("tenants array");
            assert_eq!(arr.len(), 51, "got {} tenants", arr.len());
            // The last (sorted-by-created_at_ms) tenant should be
            // beyond the cap. The hydration order matches the
            // filtered list order, so index 50 is the 51st tenant
            // and should have null episode_count.
            assert!(
                arr[50]["episode_count"].is_null(),
                "the 51st tenant (beyond cap) must have null episode_count, got {}",
                arr[50]
            );
        });
        h.shutdown(&runtime);
    }

    /// v0.10.1 test 6: when the response is under the cap, the
    /// `X-Solo-Tenants-Count-Cap-Reached` header is absent. Pin the
    /// negative case so a future refactor that always emits the
    /// header (with "false") doesn't pass silently.
    #[test]
    fn tenants_response_omits_cap_header_when_under_cap() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async {
            seed_three_tenants(&h.registry).await;
            use axum::body::Body;
            use axum::http::Request;
            let req = Request::builder()
                .method("GET")
                .uri("/v1/tenants")
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.unwrap();
            assert_eq!(resp.status(), StatusCode::OK);
            assert!(
                resp.headers().get(X_SOLO_TENANTS_COUNT_CAP_HEADER).is_none(),
                "cap-reached header must be absent under the cap"
            );
        });
        h.shutdown(&runtime);
    }

    // ---- Pure unit tests on the visibility filter ----
    //
    // These exercise `filter_tenants_for_principal` and
    // `is_single_principal_bearer` without an axum router — fast
    // feedback for the load-bearing visibility rule. The
    // router-level tests above cover the wire path.

    /// Build a synthetic `TenantRecord` so the pure unit tests don't
    /// need a real SQLCipher round-trip.
    fn make_record(id: &str) -> solo_storage::TenantRecord {
        solo_storage::TenantRecord {
            tenant_id: solo_core::TenantId::new(id).unwrap(),
            db_filename: format!("{id}.db"),
            display_name: None,
            created_at_ms: 0,
            status: solo_storage::TenantStatus::Active,
            quota_bytes: None,
            last_accessed_ms: None,
        }
    }

    #[test]
    fn filter_no_principal_returns_all() {
        let records = vec![make_record("a"), make_record("b")];
        let out = filter_tenants_for_principal(records.clone(), None);
        assert_eq!(out.len(), 2);
        assert_eq!(out[0].tenant_id.as_str(), "a");
        assert_eq!(out[1].tenant_id.as_str(), "b");
    }

    #[test]
    fn filter_bearer_principal_returns_all() {
        let records = vec![make_record("a"), make_record("b")];
        let p = AuthenticatedPrincipal::bearer(
            solo_core::TenantId::new("a").unwrap(),
        );
        let out = filter_tenants_for_principal(records, Some(&p));
        assert_eq!(out.len(), 2);
    }

    #[test]
    fn filter_oidc_principal_keeps_only_claim() {
        let records = vec![make_record("a"), make_record("b"), make_record("c")];
        // OIDC-flavoured principal: non-bearer subject + JSON-object claims.
        let p = AuthenticatedPrincipal {
            subject: "alice@example.com".to_string(),
            tenant_claim: Some(solo_core::TenantId::new("b").unwrap()),
            scopes: vec!["read".to_string()],
            claims: serde_json::json!({ "sub": "alice@example.com" }),
        };
        let out = filter_tenants_for_principal(records, Some(&p));
        assert_eq!(out.len(), 1);
        assert_eq!(out[0].tenant_id.as_str(), "b");
    }

    #[test]
    fn filter_oidc_principal_with_no_claim_returns_empty() {
        // Theoretically unreachable — middleware short-circuits at 403
        // before we see a no-claim OIDC principal. Defend anyway.
        let records = vec![make_record("a")];
        let p = AuthenticatedPrincipal {
            subject: "alice@example.com".to_string(),
            tenant_claim: None,
            scopes: vec![],
            claims: serde_json::json!({ "sub": "alice@example.com" }),
        };
        let out = filter_tenants_for_principal(records, Some(&p));
        assert!(out.is_empty());
    }

    #[test]
    fn is_single_principal_bearer_discriminator() {
        let bearer = AuthenticatedPrincipal::bearer(
            solo_core::TenantId::new("default").unwrap(),
        );
        assert!(is_single_principal_bearer(&bearer));

        let oidc = AuthenticatedPrincipal {
            subject: "alice".to_string(),
            tenant_claim: Some(solo_core::TenantId::new("alice").unwrap()),
            scopes: vec![],
            claims: serde_json::json!({ "x": 1 }),
        };
        assert!(!is_single_principal_bearer(&oidc));

        // Subject == "bearer" but claims is a non-null object → not a
        // bearer-shaped principal. Defends against a forged-bearer
        // shape that might smuggle JWT claims.
        let weird = AuthenticatedPrincipal {
            subject: "bearer".to_string(),
            tenant_claim: Some(solo_core::TenantId::default_tenant()),
            scopes: vec![],
            claims: serde_json::json!({ "leak": 1 }),
        };
        assert!(!is_single_principal_bearer(&weird));
    }

    // ---------------------------------------------------------------
    // v0.10.2 — MCP-over-HTTP transport on /mcp
    // ---------------------------------------------------------------
    //
    // These tests pin the wire contract for the new `/mcp` route added
    // in v0.10.2 P2. We exercise the route through the same `Harness`
    // pattern the rest of the file uses (in-process axum Router via
    // `tower::ServiceExt::oneshot`) — no real TCP listener needed.
    //
    // The dispatcher's unit tests live in `mcp_dispatch::tests` and
    // cover the JSON-RPC envelope shape in isolation. These tests are
    // the integration layer: real `TenantHandle`, real `WriterActor`,
    // real `SoloMcpServer::dispatch_tool` path.

    /// `POST /mcp` with `{jsonrpc, id, method: "tools/list"}` returns
    /// the canonical 14 tools. Matches the stdio smoke test
    /// `mcp_stdio_lists_fourteen_canonical_tools` from
    /// `crates/solo-cli/tests/mcp_smoke.rs` so any drift between the
    /// two transports fails one of the two suites loudly.
    #[test]
    fn mcp_http_tools_list_returns_fourteen_canonical_tools() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            let req = json!({
                "jsonrpc": "2.0",
                "id": 1,
                "method": "tools/list",
            });
            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
            assert_eq!(status, StatusCode::OK);
            assert_eq!(body.get("jsonrpc").and_then(|v| v.as_str()), Some("2.0"));
            assert_eq!(body.get("id").and_then(|v| v.as_i64()), Some(1));
            let tools = body
                .pointer("/result/tools")
                .and_then(|v| v.as_array())
                .unwrap_or_else(|| panic!("missing /result/tools: {body}"));
            let mut names: Vec<String> = tools
                .iter()
                .filter_map(|t| t.get("name").and_then(|n| n.as_str()).map(String::from))
                .collect();
            names.sort();
            assert_eq!(
                names,
                vec![
                    "memory_contradictions".to_string(),
                    "memory_facts_about".to_string(),
                    "memory_forget".to_string(),
                    "memory_forget_document".to_string(),
                    "memory_ingest_document".to_string(),
                    "memory_inspect".to_string(),
                    "memory_inspect_cluster".to_string(),
                    "memory_inspect_document".to_string(),
                    "memory_list_documents".to_string(),
                    "memory_recall".to_string(),
                    "memory_remember".to_string(),
                    "memory_remember_batch".to_string(),
                    "memory_search_docs".to_string(),
                    "memory_themes".to_string(),
                ],
                "mcp_http: tools/list returned unexpected name set"
            );
        });
        h.shutdown(&runtime);
    }

    /// `POST /mcp` with `tools/call` for `memory_remember` writes the
    /// episode and returns a confirmation string. Then a separate
    /// `GET /v1/graph/nodes` call (REST surface) sees the episode —
    /// proving one process is serving both surfaces against the same
    /// writer.
    #[test]
    fn mcp_http_remember_writes_episode_visible_via_graph_nodes() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            // 1. memory_remember via /mcp.
            let req = json!({
                "jsonrpc": "2.0",
                "id": 2,
                "method": "tools/call",
                "params": {
                    "name": "memory_remember",
                    "arguments": { "content": "mcp-http-cross-surface-smoke" },
                },
            });
            let (status, body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
            assert_eq!(status, StatusCode::OK);
            let result_text = body
                .pointer("/result/content/0/text")
                .and_then(|v| v.as_str())
                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
            assert!(
                result_text.starts_with("remembered "),
                "expected `remembered <id>`, got: {result_text}"
            );

            // 2. Confirm via /v1/graph/nodes (REST). Same writer, same
            //    tenant — the cross-surface smoke that motivates v0.10.2.
            //    Episode nodes carry the content under `label` +
            //    `preview` (the v0.10.0 graph-nodes wire shape).
            let (status2, nodes_body) =
                call(r, "GET", "/v1/graph/nodes?kind=episode&limit=10", None).await;
            assert_eq!(status2, StatusCode::OK);
            let nodes = nodes_body
                .get("nodes")
                .and_then(|v| v.as_array())
                .unwrap_or_else(|| panic!("missing nodes: {nodes_body}"));
            assert!(
                nodes.iter().any(|n| {
                    let label_hit = n
                        .get("label")
                        .and_then(|c| c.as_str())
                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
                    let preview_hit = n
                        .get("preview")
                        .and_then(|c| c.as_str())
                        .is_some_and(|s| s.contains("mcp-http-cross-surface-smoke"));
                    label_hit || preview_hit
                }),
                "graph/nodes didn't surface the MCP-written episode: {nodes_body}"
            );
        });
        h.shutdown(&runtime);
    }

    /// `POST /mcp` with `tools/call` for `memory_recall` returns the
    /// just-remembered episode. Smoke for the read path under the new
    /// transport.
    #[test]
    fn mcp_http_recall_returns_just_remembered_episode() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            // Remember first.
            let needle = "mcp-http-recall-needle-deadbeef";
            let req = json!({
                "jsonrpc": "2.0",
                "id": 3,
                "method": "tools/call",
                "params": {
                    "name": "memory_remember",
                    "arguments": { "content": needle },
                },
            });
            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req)).await;
            assert_eq!(status, StatusCode::OK);

            // Recall via the same /mcp transport.
            let req = json!({
                "jsonrpc": "2.0",
                "id": 4,
                "method": "tools/call",
                "params": {
                    "name": "memory_recall",
                    "arguments": { "query": needle, "limit": 5 },
                },
            });
            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
            assert_eq!(status, StatusCode::OK);
            let recall_text = body
                .pointer("/result/content/0/text")
                .and_then(|v| v.as_str())
                .unwrap_or_else(|| panic!("missing /result/content/0/text: {body}"));
            assert!(
                recall_text.contains(needle),
                "recall didn't surface needle `{needle}`: {recall_text}"
            );
        });
        h.shutdown(&runtime);
    }

    /// Malformed JSON body must surface as 400 (the wire envelope is
    /// invalid; the JSON-RPC layer never sees the request). The error
    /// body shape matches the rest of the API (`{error, status}`) so
    /// existing client error-handling paths keep working.
    #[test]
    fn mcp_http_malformed_body_returns_400() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            let req = Request::builder()
                .method("POST")
                .uri("/mcp")
                .header("content-type", "application/json")
                .body(Body::from("not-json-at-all".as_bytes()))
                .unwrap();
            let resp = r.oneshot(req).await.unwrap();
            assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
            let body_bytes =
                resp.into_body().collect().await.unwrap().to_bytes();
            let v: Value = serde_json::from_slice(&body_bytes).unwrap();
            assert!(
                v.get("error")
                    .and_then(|e| e.as_str())
                    .map(|s| s.contains("invalid JSON-RPC request"))
                    .unwrap_or(false),
                "got: {v}"
            );
        });
        h.shutdown(&runtime);
    }

    /// Wrong `jsonrpc` version must surface as 400. JSON-RPC 2.0 §4
    /// requires the literal string `"2.0"`.
    #[test]
    fn mcp_http_wrong_jsonrpc_version_returns_400() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            let req = json!({
                "jsonrpc": "1.0",
                "id": 1,
                "method": "tools/list",
            });
            let (status, _body) = call(r, "POST", "/mcp", Some(req)).await;
            assert_eq!(status, StatusCode::BAD_REQUEST);
        });
        h.shutdown(&runtime);
    }

    /// Unknown method returns a JSON-RPC error envelope with code
    /// -32601 (METHOD_NOT_FOUND). HTTP status stays 200 because the
    /// envelope itself parsed fine — JSON-RPC errors are in-body.
    #[test]
    fn mcp_http_unknown_method_returns_in_body_method_not_found() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            let req = json!({
                "jsonrpc": "2.0",
                "id": 5,
                "method": "definitely/not/a/method",
            });
            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
            assert_eq!(status, StatusCode::OK);
            assert_eq!(
                body.pointer("/error/code").and_then(|v| v.as_i64()),
                Some(-32601),
                "expected JSON-RPC METHOD_NOT_FOUND (-32601), got: {body}"
            );
        });
        h.shutdown(&runtime);
    }

    /// `POST /mcp` with the bearer-auth middleware enabled returns
    /// 401 without the token and 200 with the correct token.
    #[test]
    fn mcp_http_post_respects_bearer_auth() {
        let runtime = rt();
        let h = Harness::new_with_auth(&runtime, Some("secret-mcp-token".into()));
        let r = h.router.clone();
        runtime.block_on(async move {
            // No Authorization header → 401.
            let req = json!({
                "jsonrpc": "2.0",
                "id": 6,
                "method": "tools/list",
            });
            let (status, _body) = call(r.clone(), "POST", "/mcp", Some(req.clone())).await;
            assert_eq!(status, StatusCode::UNAUTHORIZED);

            // With correct bearer → 200 + valid JSON-RPC reply.
            let (status, body) = call_with_auth(
                r,
                "POST",
                "/mcp",
                Some(req),
                Some("Bearer secret-mcp-token"),
            )
            .await;
            assert_eq!(status, StatusCode::OK);
            assert_eq!(
                body.pointer("/result/tools").and_then(|v| v.as_array()).map(|a| a.len()),
                Some(14),
                "authed tools/list should still return 14 tools: {body}"
            );
        });
        h.shutdown(&runtime);
    }

    /// CORS preflight (`OPTIONS /mcp`) from a localhost origin returns
    /// 200 (tower-http's CorsLayer handles preflight implicitly) and
    /// the `access-control-allow-headers` carries both
    /// `x-solo-tenant` and `mcp-session-id`. Pins the v0.10.2
    /// allow-list addition.
    #[test]
    fn mcp_http_cors_preflight_allows_mcp_session_id_header() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            let req = Request::builder()
                .method("OPTIONS")
                .uri("/mcp")
                .header("origin", "http://localhost:5173")
                .header("access-control-request-method", "POST")
                .header(
                    "access-control-request-headers",
                    "content-type, mcp-session-id, x-solo-tenant, authorization",
                )
                .body(Body::empty())
                .unwrap();
            let resp = r.oneshot(req).await.unwrap();
            // tower-http CorsLayer returns 200 for permitted preflight.
            assert_eq!(resp.status(), StatusCode::OK);
            let allow_headers = resp
                .headers()
                .get("access-control-allow-headers")
                .and_then(|h| h.to_str().ok())
                .unwrap_or("")
                .to_lowercase();
            assert!(
                allow_headers.contains("mcp-session-id"),
                "preflight allow-headers must include mcp-session-id; got: {allow_headers}"
            );
            assert!(
                allow_headers.contains("x-solo-tenant"),
                "preflight allow-headers must still include x-solo-tenant; got: {allow_headers}"
            );
            // Allow-origin must echo the localhost origin (per the
            // permissive-localhost predicate).
            let allow_origin = resp
                .headers()
                .get("access-control-allow-origin")
                .and_then(|h| h.to_str().ok())
                .unwrap_or("");
            assert_eq!(allow_origin, "http://localhost:5173");
        });
        h.shutdown(&runtime);
    }

    /// Notification messages (no `id`) return 202 Accepted with an
    /// empty body. Per JSON-RPC 2.0 §4.1 the server MUST NOT reply.
    #[test]
    fn mcp_http_notification_returns_202_accepted() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            let req = json!({
                "jsonrpc": "2.0",
                "method": "notifications/initialized",
                "params": {},
            });
            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
            assert_eq!(status, StatusCode::ACCEPTED);
            // Empty body — call() returns Value::Null when the body is
            // empty.
            assert_eq!(body, Value::Null);
        });
        h.shutdown(&runtime);
    }

    /// `initialize` returns the `{name: "solo", version: <crate
    /// version>}` server-info pinned by the stdio invariant test
    /// `server_info_identity_is_solo_not_rmcp_or_solo_api`. Sanity
    /// check that the v0.10.2 HTTP transport doesn't drift away from
    /// the stdio identity.
    #[test]
    fn mcp_http_initialize_returns_solo_server_info() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let r = h.router.clone();
        runtime.block_on(async move {
            let req = json!({
                "jsonrpc": "2.0",
                "id": 7,
                "method": "initialize",
                "params": {
                    "protocolVersion": "2024-11-05",
                    "capabilities": {},
                    "clientInfo": { "name": "solo-http-test", "version": "0.0.0" },
                },
            });
            let (status, body) = call(r, "POST", "/mcp", Some(req)).await;
            assert_eq!(status, StatusCode::OK);
            assert_eq!(
                body.pointer("/result/serverInfo/name").and_then(|v| v.as_str()),
                Some("solo"),
                "serverInfo.name must be `solo`, not `solo-api` or `rmcp`; got: {body}"
            );
            // `protocolVersion` is the static value the dispatcher
            // emits today (2024-11-05). The stdio loop emits rmcp's
            // own default — we cross-check those two stay aligned in
            // the v0.10.3+ session work; for v0.10.2 we just pin the
            // HTTP-side value.
            assert_eq!(
                body.pointer("/result/protocolVersion").and_then(|v| v.as_str()),
                Some("2024-11-05"),
            );
        });
        h.shutdown(&runtime);
    }
}

#[cfg(test)]
mod cors_tests {
    use super::is_localhost_origin;

    #[test]
    fn accepts_canonical_localhost_origins() {
        assert!(is_localhost_origin("http://localhost"));
        assert!(is_localhost_origin("http://localhost:3000"));
        assert!(is_localhost_origin("https://localhost:8443"));
        assert!(is_localhost_origin("http://127.0.0.1"));
        assert!(is_localhost_origin("http://127.0.0.1:5173"));
        assert!(is_localhost_origin("http://[::1]"));
        assert!(is_localhost_origin("http://[::1]:8080"));
    }

    #[test]
    fn rejects_remote_origins() {
        assert!(!is_localhost_origin("http://example.com"));
        assert!(!is_localhost_origin("https://malicious.example"));
        assert!(!is_localhost_origin("http://192.168.1.5"));
        assert!(!is_localhost_origin("http://10.0.0.1"));
    }

    #[test]
    fn rejects_dns_rebinding_tricks() {
        // nip.io and friends — DNS that resolves to 127.0.0.1 but the
        // Origin header carries the public-DNS name. Rejecting these
        // closes the rebinding-via-Origin gap.
        assert!(!is_localhost_origin("http://127.0.0.1.nip.io"));
        assert!(!is_localhost_origin("http://localhost.evil.com"));
        assert!(!is_localhost_origin("http://evil.localhost"));
    }

    #[test]
    fn rejects_non_http_schemes() {
        assert!(!is_localhost_origin("file:///"));
        assert!(!is_localhost_origin("ws://localhost:3000"));
        assert!(!is_localhost_origin("javascript:alert(1)"));
    }

    #[test]
    fn rejects_malformed() {
        assert!(!is_localhost_origin(""));
        assert!(!is_localhost_origin("localhost"));
        assert!(!is_localhost_origin("//localhost"));
    }
}