solo-api 0.11.5

// SPDX-License-Identifier: Apache-2.0

//! MCP (Model Context Protocol) server for Solo.
//!
//! Exposes eighteen tools to MCP clients (Claude Desktop, Cursor, etc.):
//!
//! Episode tools (v0.1+, with v0.9.2 additions):
//!   - `memory_remember(content, source_type?, source_id?, salience?)` —
//!     store an episode. Returns the new MemoryId. `salience` (v0.9.2+)
//!     is optional in [0.0, 1.0] and defaults to 0.5.
//!   - `memory_remember_batch(items)` (v0.9.2+) — atomically store N
//!     episodes in one writer-actor transaction. Each item has the
//!     same fields as `memory_remember`. Returns an ordered array of
//!     MemoryIds; either all items persist or none do.
//!   - `memory_recall(query, limit?)` — vector search. Returns the top-K
//!     matches with content + tier + status.
//!   - `memory_context(query, subject?, window_days?, limit?)` — combined
//!     recall + themes + facts + contradictions bundle for agent context.
//!   - `memory_update(memory_id, content)` — correct/supersede an active
//!     episode's content and refresh its embedding/index row.
//!   - `memory_forget(memory_id, reason?)` — soft-delete an episode.
//!   - `memory_inspect(memory_id)` — return the full episode record.
//!
//! Derived-layer tools (v0.4.0+):
//!   - `memory_themes(window_days?, limit?)` — list cluster themes.
//!   - `memory_facts_about(subject, ...)` — query the structured-fact
//!     knowledge graph (subject-predicate-object triples).
//!   - `memory_entities(query, limit?)` — discover entity ids from the
//!     structured-fact graph.
//!   - `memory_contradictions(limit?)` — disagreements flagged during
//!     consolidation.
//!   - `memory_contradiction_resolve(...)` — mark a contradiction resolved,
//!     unresolved, or reopened.
//!
//! Derived-layer tools (v0.5.0+):
//!   - `memory_inspect_cluster(cluster_id, full_content?)` — drill
//!     into one cluster's abstraction + source episodes (truncated).
//!
//! Document tools (v0.7.0+):
//!   - `memory_ingest_document(path)` — read a file from disk, split it
//!     into chunks, embed each, and store under documents/document_chunks.
//!   - `memory_search_docs(query, limit?)` — vector search restricted to
//!     document chunks; returns chunk content + parent-doc context.
//!   - `memory_inspect_document(doc_id)` — show one document's metadata
//!     plus a previewed list of its chunks.
//!   - `memory_list_documents(limit?, offset?, include_forgotten?)` —
//!     paginate over ingested documents, newest first.
//!   - `memory_forget_document(doc_id)` — soft-delete a document; chunks
//!     stop appearing in `memory_search_docs` and tombstone in HNSW.
//!
//! ## Transport
//!
//! `serve_stdio` wires the server to stdin/stdout for use as a subprocess
//! ("`claude_desktop_config.json` or `~/.cursor/mcp.json` invokes
//! `solo mcp-stdio`"). The function awaits a graceful shutdown when stdin
//! closes (parent disconnects) — same lifecycle as `solo daemon`'s
//! Ctrl+C path.
//!
//! ## What's deferred
//!
//! - SSE/HTTP transports — `rmcp` ships them, but v0.1 ships stdio only.
//! - `prompts/` and `resources/` capabilities — not needed for the
//!   four-tool surface; ServerHandler defaults return empty lists.
//! - Tool argument validation beyond JSON Schema typing — we trust rmcp
//!   to deserialize per the schema, then serde-deserialize into our
//!   typed param structs. Bad inputs surface as clear errors.

use std::sync::Arc;

use rmcp::handler::server::ServerHandler;
use rmcp::model::{
    CallToolRequestParams as CallToolRequestParam, CallToolResult, Content, Implementation,
    InitializeRequestParams, InitializeResult, ListToolsResult,
    PaginatedRequestParams as PaginatedRequestParam, ProtocolVersion, ServerCapabilities,
    ServerInfo, Tool,
};
use rmcp::service::{RequestContext, RoleServer};
use rmcp::{ErrorData as McpError, ServiceExt};
use serde::{Deserialize, Serialize};
use solo_core::{Confidence, DocumentId, EncodingContext, Episode, MemoryId, Tier};
use solo_storage::{TenantHandle, TenantRegistry};
use std::str::FromStr;

/// The MCP server. Cheap to clone — every field is `Arc`-cloneable.
///
/// v0.8.0 P2: an MCP session resolves to **one tenant**. The session's
/// `tenant_handle` is resolved at `initialize` time (today: from the
/// CLI invocation via `solo mcp-stdio --tenant <id>`; future versions
/// may resolve per-bearer-token via OIDC). Subsequent `tools/call`
/// invocations route through the cached handle without re-resolving.
/// Operators that need multi-tenant MCP spawn one `solo mcp-stdio`
/// subprocess per tenant.
#[derive(Clone)]
pub struct SoloMcpServer {
    inner: Arc<Inner>,
}

struct Inner {
    /// Multi-tenant registry shared across all sessions. Held so that a
    /// future MCP capability that lists/inspects other tenants has a
    /// path to them (out of scope for v0.8.0 P2). P3 (auth) will use
    /// this to re-resolve the tenant from a bearer-token claim.
    #[allow(dead_code)]
    registry: Arc<TenantRegistry>,
    /// The tenant this MCP session speaks for. Resolved at session
    /// construction time.
    tenant: Arc<TenantHandle>,
    /// Read-path aliases for the canonical `"user"` subject. Sourced
    /// from `solo.config.toml` `[identity] user_aliases`; threaded
    /// through to `solo_query::facts_about` so a query for `"alex"`
    /// also surfaces rows historically extracted as `"user"`. Empty
    /// vec = behave as today (no expansion).
    user_aliases: Vec<String>,
    /// v0.8.0 P4 audit-log principal for this MCP session. MCP is
    /// bearer-only (no OIDC story in the spec), so the principal is
    /// effectively `"bearer"` when the daemon was started with
    /// `--bearer-token-file` and `None` otherwise. Persisted here so
    /// every tool dispatch threads it into the audit emit without
    /// reconstructing it per call.
    audit_principal: Option<String>,
}

/// v0.9.0 P2: outcome of inspecting the tenant's `[llm]` config + the
/// peer's `sampling` capability at MCP `initialize` time.
///
/// Separating the decision from the actual slot write makes the
/// gating logic unit-testable without needing a real
/// `rmcp::Peer<RoleServer>` (whose constructors are private).
/// `SoloMcpServer::initialize` performs the match and routes to the
/// side-effect path; tests pin the table directly.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InitializeDecision {
    /// Tenant's LLM backend doesn't require an MCP peer; the slot was
    /// populated eagerly at registry-open time (or stays `None` for
    /// `LlmConfig::None`). MCP initialize succeeds without writing the
    /// slot.
    Allow,
    /// Tenant's LLM backend is `mcp_sampling` AND the peer advertised
    /// the `sampling` capability. `populate_sampling_steward` writes a
    /// peer-bound Steward into the slot.
    PopulateSamplingSteward,
    /// Tenant's LLM backend is `mcp_sampling` but the peer did NOT
    /// advertise the `sampling` capability. MCP initialize must refuse
    /// with the locked BLOCKER 2 error message.
    RejectMissingSamplingCapability,
}

/// v0.9.0 P2: decide the initialize outcome given the tenant's
/// `[llm]` config and whether the peer advertised the `sampling`
/// capability.
///
/// Pure function — no side effects, no rmcp peer required. Pinned by
/// `initialize_decision_*` tests.
pub fn initialize_decision(
    llm_settings: &Option<solo_storage::LlmSettings>,
    peer_sampling_supported: bool,
) -> InitializeDecision {
    match llm_settings {
        Some(settings) if settings.requires_mcp_peer() => {
            if peer_sampling_supported {
                InitializeDecision::PopulateSamplingSteward
            } else {
                InitializeDecision::RejectMissingSamplingCapability
            }
        }
        _ => InitializeDecision::Allow,
    }
}

/// v0.9.0 P2: locked error message body for both the daemon-startup
/// rejection guard and the MCP `initialize` capability gate (plan §3
/// Decision 4 / BLOCKER 2 resolution). Returned verbatim to the
/// operator so the commented-out TOML snippets are copy-pasteable.
///
/// Lives at module scope so the daemon startup path (in `solo-cli`)
/// and the `SoloMcpServer::initialize` hook share one source of truth
/// — a future audit-revision can grep the locked phrasing without
/// chasing two divergent copies.
pub fn sampling_capability_missing_error_message() -> String {
    [
        "LLM backend `mcp_sampling` requires a connected MCP client that",
        "advertises the `sampling` capability at initialize. Either the",
        "current MCP client does not support sampling, or this Solo",
        "process is running in daemon-only mode (no peer to call back).",
        "",
        "Pick one of:",
        "",
        "  # Anthropic (hosted):",
        "  [llm]",
        "  mode = \"anthropic\"",
        "  api_key_env = \"ANTHROPIC_API_KEY\"",
        "  model = \"claude-sonnet-4-6\"",
        "",
        "  # OpenAI (hosted):",
        "  [llm]",
        "  mode = \"openai\"",
        "  api_key_env = \"OPENAI_API_KEY\"",
        "  model = \"gpt-5o\"",
        "",
        "  # Ollama (local daemon):",
        "  [llm]",
        "  mode = \"ollama\"",
        "  base_url = \"http://localhost:11434\"",
        "  model = \"qwen3-coder:30b\"",
        "",
        "  # None (cluster-only; abstractions skipped):",
        "  [llm]",
        "  mode = \"none\"",
        "",
        "See docs/releases/v0.9.0.md \u{00a7}LLM-backend selection for details.",
    ]
    .join("\n")
}

/// v0.8.1 P2: env var name MCP clients set when launching the server
/// process to attribute audit rows on the stdio transport. Closes the
/// v0.8.0 known-issue gap where MCP audit rows always carried
/// `principal_subject = NULL` on the daemon path.
///
/// Precedence (when the future HTTP-MCP transport lands):
///   1. `Authorization: Bearer <token>` header on the HTTP-MCP request
///      (resolved through `AuthConfig::Bearer` validator).
///   2. `SOLO_MCP_PRINCIPAL_TOKEN` env var on the spawned process.
///
/// For the v0.8.x stdio-only world only the env-var path applies; the
/// header path is a no-op (no HTTP transport wired). The constant lives
/// at module scope so external callers (CLI subcommand, tests) reference
/// it by name rather than re-typing the string literal.
pub const ENV_MCP_PRINCIPAL_TOKEN: &str = "SOLO_MCP_PRINCIPAL_TOKEN";

/// v0.8.1 P2: resolve the MCP-session principal at `initialize`-time.
///
/// Reads `SOLO_MCP_PRINCIPAL_TOKEN` env var (stdio path); future HTTP-MCP
/// callers will pass the bearer header value in via the explicit
/// `header_value` arg. The header beats the env when both are present.
///
/// Returns `Some(subject)` on resolution success; `None` when neither
/// source carries a non-empty value. Empty / whitespace-only values are
/// treated as absent so an accidentally-set `SOLO_MCP_PRINCIPAL_TOKEN=""`
/// in a launcher script doesn't pin every audit row to a blank principal.
///
/// The current implementation treats the env var value as the principal
/// subject directly. A future hardening pass can validate against the
/// daemon's `[auth] bearer.token` config to refuse mismatched tokens —
/// today the env var is operator-trusted (same trust model as
/// `SOLO_PASSPHRASE`).
pub fn resolve_mcp_principal(header_value: Option<&str>) -> Option<String> {
    // HTTP-MCP path wins when configured.
    if let Some(h) = header_value {
        if let Some(token) = h.strip_prefix("Bearer ") {
            let trimmed = token.trim();
            if !trimmed.is_empty() {
                // Header carries the raw bearer token. Same shape as the
                // stdio env-var path: the *value* is the principal
                // subject in v0.8.1; v0.8.2+ may validate against a
                // configured token set and surface the JWT `sub` claim
                // instead.
                return Some(trimmed.to_string());
            }
        }
    }
    // Stdio env-var fallback.
    match std::env::var(ENV_MCP_PRINCIPAL_TOKEN) {
        Ok(v) => {
            let trimmed = v.trim();
            if trimmed.is_empty() {
                None
            } else {
                Some(trimmed.to_string())
            }
        }
        Err(_) => None,
    }
}

impl SoloMcpServer {
    /// Build a server speaking for `tenant` (v0.8.0 P2 — one MCP session
    /// ↔ one tenant). The registry is held so future capabilities can
    /// reach across tenants if needed; today every handler routes
    /// through `self.inner.tenant`.
    ///
    /// v0.8.1 P2: auto-resolves the audit principal from the
    /// `SOLO_MCP_PRINCIPAL_TOKEN` env var (see [`resolve_mcp_principal`]).
    /// When neither the env var nor a header is set, the principal stays
    /// `None` — preserving v0.8.0 behavior for single-user setups.
    pub fn new_for_tenant(
        registry: Arc<TenantRegistry>,
        tenant: Arc<TenantHandle>,
        user_aliases: Vec<String>,
    ) -> Self {
        let principal = resolve_mcp_principal(None);
        Self::new_for_tenant_with_principal(registry, tenant, user_aliases, principal)
    }

    /// v0.8.0 P4: like [`Self::new_for_tenant`], but records an explicit
    /// audit principal subject for every tool dispatch. MCP is
    /// bearer-only at v0.8.0 — the orchestration layer (today: the
    /// daemon's `--bearer-token-file` path) decides whether a session
    /// counts as "bearer-authenticated" and passes `Some("bearer")`;
    /// CLI / unauth paths pass `None`.
    ///
    /// v0.8.1 P2: when the caller passes `audit_principal = None`, the
    /// env-var auto-resolution still runs (in `new_for_tenant`). Callers
    /// who want to *explicitly* suppress env-var resolution can call
    /// this method with `None` after `std::env::remove_var(...)`, or use
    /// the dedicated test constructor that bypasses env reads.
    pub fn new_for_tenant_with_principal(
        registry: Arc<TenantRegistry>,
        tenant: Arc<TenantHandle>,
        user_aliases: Vec<String>,
        audit_principal: Option<String>,
    ) -> Self {
        Self {
            inner: Arc::new(Inner {
                registry,
                tenant,
                user_aliases,
                audit_principal,
            }),
        }
    }
}

/// Convenience: run the server over stdio and await its termination.
/// Returns when stdin closes (parent disconnect) or the runtime exits.
pub async fn serve_stdio(server: SoloMcpServer) -> anyhow::Result<()> {
    use rmcp::transport::io::stdio;
    let (stdin, stdout) = stdio();
    let running = server.serve((stdin, stdout)).await?;
    running.waiting().await?;
    Ok(())
}

// ---------------------------------------------------------------------------
// Tool argument schemas
// ---------------------------------------------------------------------------

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RememberArgs {
    pub content: String,
    #[serde(default)]
    pub source_type: Option<String>,
    #[serde(default)]
    pub source_id: Option<String>,
    /// v0.9.2 — optional salience in [0.0, 1.0]. `None` → 0.5 (preserves
    /// pre-v0.9.2 behaviour). Out-of-range values are rejected by
    /// [`Self::validate_salience`] before reaching the writer.
    #[serde(default)]
    pub salience: Option<f32>,
}

/// v0.9.2 — one item in a `memory_remember_batch` request.
///
/// Mirrors [`RememberArgs`] field-for-field minus the wrapper-tool
/// invariant: callers pass an array of these inside [`RememberBatchArgs`].
/// All items in a batch are persisted in a single `BEGIN IMMEDIATE`
/// transaction (per dev-log 0120 §3 Decision A) so partial-failure
/// scenarios are impossible from the client's perspective — either
/// every item lands or none do.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RememberItem {
    pub content: String,
    #[serde(default)]
    pub source_type: Option<String>,
    #[serde(default)]
    pub source_id: Option<String>,
    /// Optional salience in [0.0, 1.0]; `None` → 0.5. See
    /// [`RememberArgs::salience`].
    #[serde(default)]
    pub salience: Option<f32>,
}

/// v0.9.2 — args for the new `memory_remember_batch` MCP tool.
///
/// Wraps `Vec<RememberItem>`. The handler validates `items.is_empty()`
/// and `items.len() > MAX_REMEMBER_BATCH_SIZE` before any embedding
/// work; per-item content/salience is validated immediately afterwards.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RememberBatchArgs {
    pub items: Vec<RememberItem>,
}

/// Validate that an optional salience value is well-formed (NaN-free
/// and inside `[0.0, 1.0]`). Centralised so both `memory_remember` and
/// `memory_remember_batch` share the same rejection shape.
fn validate_salience(salience: Option<f32>) -> std::result::Result<(), McpError> {
    if let Some(s) = salience {
        if !s.is_finite() || !(0.0..=1.0).contains(&s) {
            return Err(McpError::invalid_params(
                format!("salience must be in [0.0, 1.0]; got {s}"),
                None,
            ));
        }
    }
    Ok(())
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallArgs {
    pub query: String,
    #[serde(default = "default_limit")]
    pub limit: usize,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryContextArgs {
    pub query: String,
    #[serde(default)]
    pub subject: Option<String>,
    #[serde(default)]
    pub window_days: Option<i64>,
    #[serde(default = "default_limit")]
    pub limit: usize,
}

fn default_limit() -> usize {
    5
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ForgetArgs {
    pub memory_id: String,
    #[serde(default = "default_forget_reason")]
    pub reason: String,
}

fn default_forget_reason() -> String {
    "user-initiated via MCP".into()
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InspectArgs {
    pub memory_id: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UpdateArgs {
    pub memory_id: String,
    pub content: String,
}

// Path 1 derived-layer tools (v0.4.0+) — query the Steward's outputs.
// `solo_query::derived` is the single source of truth; these handlers
// just translate JSON args to function args and serialise the result
// vec to JSON for the MCP wire.

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ThemesArgs {
    /// Optional time window in days; `None` = unfiltered, return up
    /// to `limit` most-recent themes across all time. `Some(7)` =
    /// "themes from the last week".
    #[serde(default)]
    pub window_days: Option<i64>,
    #[serde(default = "default_limit")]
    pub limit: usize,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FactsAboutArgs {
    /// Subject id to query — required (predicate-only scans
    /// intentionally not supported).
    pub subject: String,
    #[serde(default)]
    pub predicate: Option<String>,
    #[serde(default)]
    pub since_ms: Option<i64>,
    #[serde(default)]
    pub until_ms: Option<i64>,
    /// v0.5.1 Priority 8 — widen the query to also match rows where
    /// `subject` appears as the object (e.g. surface "Sam pushes back
    /// on PRs about Maya" under `facts_about(subject="maya")`).
    /// Default `false` preserves v0.5.0 behaviour.
    #[serde(default)]
    pub include_as_object: bool,
    #[serde(default = "default_limit")]
    pub limit: usize,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EntitiesArgs {
    pub query: String,
    #[serde(default = "default_limit")]
    pub limit: usize,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContradictionsArgs {
    #[serde(default = "default_limit")]
    pub limit: usize,
}

fn default_contradiction_status() -> String {
    "resolved".to_string()
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContradictionResolveArgs {
    pub a_id: String,
    pub b_id: String,
    pub kind: String,
    #[serde(default = "default_contradiction_status")]
    pub status: String,
    #[serde(default)]
    pub resolution_note: Option<String>,
    #[serde(default)]
    pub winning_triple_id: Option<String>,
}

/// Args for `memory_inspect_cluster` (v0.5.0 Priority 3). `cluster_id`
/// is required; `full_content` is opt-in for the rare power-user case
/// where 200-char-per-episode truncation is too aggressive.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InspectClusterArgs {
    pub cluster_id: String,
    /// If `true`, episode `content` fields are returned verbatim. If
    /// `false` or omitted (the default), each episode's content is
    /// truncated to `solo_query::EPISODE_TRUNCATE_CHARS` chars with a
    /// trailing `…`.
    #[serde(default)]
    pub full_content: bool,
}

// Document tools (v0.7.0+). Five args structs paired with five handlers.
// Wire shapes per `docs/dev-log/0083-v0.7.0-implementation-plan.md` §2 P5.

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IngestDocumentArgs {
    /// Server-side filesystem path to the file to ingest. Must be
    /// readable by the Solo process. The writer parses the file by
    /// extension, splits it into ~500-token chunks, embeds each, and
    /// stores them under `documents` + `document_chunks`.
    pub path: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchDocsArgs {
    pub query: String,
    #[serde(default = "default_search_docs_limit")]
    pub limit: usize,
}

fn default_search_docs_limit() -> usize {
    5
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InspectDocumentArgs {
    pub doc_id: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ListDocumentsArgs {
    #[serde(default = "default_list_documents_limit")]
    pub limit: usize,
    #[serde(default)]
    pub offset: usize,
    /// If `true`, also include documents the user has forgotten. Default
    /// `false` matches the agent-UX expectation that recall + listing
    /// ignore soft-deleted rows.
    #[serde(default)]
    pub include_forgotten: bool,
}

fn default_list_documents_limit() -> usize {
    20
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ForgetDocumentArgs {
    pub doc_id: String,
}

// ---------------------------------------------------------------------------
// ServerHandler implementation
// ---------------------------------------------------------------------------

impl ServerHandler for SoloMcpServer {
    fn get_info(&self) -> ServerInfo {
        // rmcp 1.x: ServerInfo is non-exhaustive AND lives in another crate,
        // so neither struct-literal nor functional-update syntax (..) is
        // allowed from outside. Build via mut on a Default::default().
        let capabilities = ServerCapabilities::builder().enable_tools().build();
        let mut info = ServerInfo::default();
        info.protocol_version = ProtocolVersion::default();
        info.capabilities = capabilities;
        // v0.9.1 P1 Fix 1 — `Implementation::from_build_env()` reads
        // `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from rmcp's OWN build
        // environment (the helper lives in rmcp, so the proc-macro
        // expansion captures rmcp's manifest, not ours). On v0.9.0 every
        // Solo MCP daemon self-identified as `{name: "rmcp", version: "1.7.0"}`.
        // Pinned by `tests::server_info_identity_is_solo_not_rmcp_or_solo_api`.
        // The literal `"solo"` (not `env!("CARGO_PKG_NAME")`) is deliberate:
        // this crate is `solo-api`, but the operator-facing identity is
        // the binary name `solo`.
        info.server_info =
            Implementation::new("solo".to_string(), env!("CARGO_PKG_VERSION").to_string());
        info.instructions = Some(
            "Solo gives you persistent memory across conversations \
                 with this user — what they've told you before, the \
                 people and projects in their life, and where their \
                 stated beliefs have shifted, plus a library of \
                 documents the user has ingested (notes, runbooks, \
                 PDFs). Reach for these tools whenever the user \
                 references something from earlier (\"like I \
                 mentioned\", \"the project I'm working on\", \"my \
                 friend Alex\", \"the notes I uploaded last week\") \
                 or asks a question that hinges on personal context \
                 or document content you don't have in the current \
                 chat. \
                 \n\nBest first call for agent work: memory_context \
                 (one bounded bundle containing recall, themes, \
                 optional facts, and contradictions). Use the \
                 narrower tools below when you need more detail or \
                 a specific operation. \
                 \n\nTools to write or look up specific moments: \
                 memory_remember (save something worth keeping), \
                 memory_update (correct one active saved item), \
                 memory_recall (search past conversations by topic), \
                 memory_inspect (show one saved item by id), \
                 memory_forget (delete one saved item). \
                 \n\nTools for the bigger picture (populated as the \
                 user uses Solo over time): memory_themes (recent \
                 topics they've been thinking about), \
                 memory_facts_about (what you know about a person, \
                 project, or place — \"what do you know about \
                 Alex?\"), memory_entities (discover graph entity \
                 ids by name), memory_contradictions (places where the \
                 user has said two things that disagree — surface \
                 these before answering), memory_contradiction_resolve \
                 (mark a contradiction resolved or reopened), \
                 memory_inspect_cluster \
                 (the raw conversations behind one summary). \
                 \n\nTools for the user's documents: \
                 memory_ingest_document (read a file from disk and \
                 add it to Solo's library), memory_search_docs \
                 (search across ingested documents by topic — use \
                 when the user asks about something they wrote down \
                 or saved as a file), memory_inspect_document (show \
                 one document's metadata plus a preview of its \
                 chunks), memory_list_documents (browse documents \
                 by recency), memory_forget_document (drop a \
                 document from the library)."
                .into(),
        );
        info
    }

    /// v0.9.0 P2: override `initialize` so we can:
    ///
    ///   1. Cache the client's `InitializeRequestParams` on the peer
    ///      (delegates to rmcp's default for this).
    ///   2. If the tenant's `[llm] mode = "mcp_sampling"`:
    ///      a. Refuse to initialize when the peer didn't advertise the
    ///         `sampling` capability — surfaces the BLOCKER 2-locked
    ///         error message so the user sees commented-out
    ///         alternative TOML blocks.
    ///      b. Otherwise build a `SamplingLlmClient`-backed Steward and
    ///         write it into `tenant.steward_slot()` so the writer
    ///         actor's next consolidate-tick reads a populated slot.
    ///   3. For any other `[llm]` mode, return the configured tools
    ///      surface unchanged (the slot was eagerly populated at
    ///      registry-open time by the static StewardFactory).
    async fn initialize(
        &self,
        request: InitializeRequestParams,
        context: RequestContext<RoleServer>,
    ) -> std::result::Result<InitializeResult, McpError> {
        // Defer to rmcp's default for peer-info caching (matches the
        // `if peer_info().is_none()` shape).
        if context.peer.peer_info().is_none() {
            context.peer.set_peer_info(request.clone());
        }

        let llm_settings = self.inner.tenant.config().llm.as_ref().cloned();
        let peer_sampling_supported = request.capabilities.sampling.is_some();
        match initialize_decision(&llm_settings, peer_sampling_supported) {
            InitializeDecision::Allow => {}
            InitializeDecision::PopulateSamplingSteward => {
                // Build the sampling-backed Steward against the live
                // peer + the per-tenant write handle, then write it
                // into the slot.
                self.populate_sampling_steward(&context).await;
            }
            InitializeDecision::RejectMissingSamplingCapability => {
                return Err(McpError::invalid_request(
                    sampling_capability_missing_error_message(),
                    None,
                ));
            }
        }

        Ok(self.get_info())
    }

    async fn list_tools(
        &self,
        _request: Option<PaginatedRequestParam>,
        _context: RequestContext<RoleServer>,
    ) -> std::result::Result<ListToolsResult, McpError> {
        Ok(ListToolsResult {
            tools: build_tools(),
            next_cursor: None,
            ..Default::default()
        })
    }

    async fn call_tool(
        &self,
        request: CallToolRequestParam,
        _context: RequestContext<RoleServer>,
    ) -> std::result::Result<CallToolResult, McpError> {
        let CallToolRequestParam {
            name, arguments, ..
        } = request;
        let args_value = serde_json::Value::Object(arguments.unwrap_or_default());
        // v0.11.0 P3: stdio transport has no per-session broadcast
        // channel to publish progress events through (one process =
        // one tenant = one implicit "session" for the subprocess's
        // lifetime). Pass `None` — handlers see it and skip the
        // emission code paths silently.
        self.dispatch_tool(&name, args_value, None).await
    }
}

impl SoloMcpServer {
    /// v0.9.0 P2: build a sampling-backed `Arc<Steward>` for the
    /// current MCP session and write it into the tenant's
    /// `steward_slot`. Called from [`Self::initialize`] when:
    ///
    ///   * `tenant.config().llm.requires_mcp_peer()` is true, AND
    ///   * the peer advertised the `sampling` capability.
    ///
    /// Implementation notes:
    ///
    ///   * `StewardConfig::from_settings_then_env()` is parsed best-
    ///     effort against the tenant's `SoloConfig.steward` block (the
    ///     v0.11.1 TOML surface) AND env vars; if either is malformed,
    ///     we fall back to `default()` and log a warning. This matches
    ///     `daemon.rs`'s tolerance — a bad config shouldn't block an
    ///     MCP session from initialising.
    ///
    ///   * The slot is OVERWRITTEN unconditionally — a fresh MCP
    ///     session always wins. If a prior session's
    ///     `SamplingLlmClient` had outstanding requests, they error out
    ///     on the rmcp layer when their peer drops.
    ///
    ///   * The cached `audit_principal` is the one the MCP server
    ///     constructed for this session via `resolve_mcp_principal`.
    ///     Every `peer.create_message` call from this Steward routes
    ///     that principal through to the per-tenant
    ///     `AuditOperation::LlmSamplingCall` row.
    async fn populate_sampling_steward(&self, context: &RequestContext<RoleServer>) {
        // v0.11.1: read `[steward]` TOML overrides from the tenant's
        // already-parsed `SoloConfig` and layer env vars on top, the
        // same resolution order as `daemon.rs` + `common.rs`. Best-
        // effort: a malformed value falls back to defaults rather than
        // blocking session init.
        let tenant_cfg = self.inner.tenant.config();
        let steward_config = solo_steward::StewardConfig::from_settings_then_env(
            tenant_cfg.steward.cluster_min_size,
            tenant_cfg.steward.cluster_cosine_threshold,
        )
        .unwrap_or_else(|e| {
            tracing::warn!(
                error = %e,
                "v0.11.1: StewardConfig::from_settings_then_env failed at MCP \
                 initialize; falling back to defaults"
            );
            solo_steward::StewardConfig::default()
        });
        // v0.9.0 P5 (M3 wiring): read `[sampling]` from the tenant's
        // already-parsed `SoloConfig`. `SamplingConfig::default()` lands
        // when the block is omitted (5s window / 10 max-batch); operator
        // overrides flow through to `build_sampling_steward` and into
        // `SamplingCoordinator::with_settings`.
        let sampling_config = self.inner.tenant.config().sampling.clone();
        let peer = context.peer.clone();
        let write_handle = self.inner.tenant.write().clone();
        let steward = crate::llm::build_sampling_steward(
            peer,
            write_handle,
            self.inner.audit_principal.clone(),
            steward_config,
            sampling_config.clone(),
        );
        let slot = self.inner.tenant.steward_slot();
        let mut guard = slot.write().await;
        *guard = Some(steward);
        tracing::info!(
            tenant = %self.inner.tenant.tenant_id(),
            coalesce_window_ms = sampling_config.coalesce_window_ms,
            coalesce_max_requests = sampling_config.coalesce_max_requests,
            "v0.9.0 P5: MCP-sampling Steward attached to tenant.steward_slot \
             (PeerSamplingClient → SamplingCoordinator → SamplingLlmClient)"
        );
    }

    /// Direct tool-dispatch path used by both `call_tool` (the
    /// ServerHandler trait method, behind the rmcp protocol layer) and
    /// in-process tests that don't want to spin up a full transport pair.
    /// Bypasses `RequestContext` (which requires a `Peer` not constructible
    /// outside rmcp internals).
    ///
    /// v0.11.0 P3: `progress` is `Some` only when the HTTP transport
    /// dispatched the request AND the client opted in via
    /// `_meta.progressToken`. The three long-running handlers
    /// (`memory_ingest_document`, `memory_search_docs`,
    /// `memory_remember_batch`) consult the reporter; the other
    /// 11 handlers ignore it (backward compat with stdio and with
    /// HTTP clients that did not opt in).
    pub async fn dispatch_tool(
        &self,
        name: &str,
        args_value: serde_json::Value,
        progress: Option<crate::mcp_progress::ProgressReporter>,
    ) -> std::result::Result<CallToolResult, McpError> {
        match name {
            "memory_remember" => {
                let args: RememberArgs = parse_args(&args_value)?;
                self.handle_remember(args).await
            }
            "memory_remember_batch" => {
                let args: RememberBatchArgs = parse_args(&args_value)?;
                self.handle_remember_batch(args, progress).await
            }
            "memory_recall" => {
                let args: RecallArgs = parse_args(&args_value)?;
                self.handle_recall(args).await
            }
            "memory_context" => {
                let args: MemoryContextArgs = parse_args(&args_value)?;
                self.handle_memory_context(args).await
            }
            "memory_forget" => {
                let args: ForgetArgs = parse_args(&args_value)?;
                self.handle_forget(args).await
            }
            "memory_inspect" => {
                let args: InspectArgs = parse_args(&args_value)?;
                self.handle_inspect(args).await
            }
            "memory_update" => {
                let args: UpdateArgs = parse_args(&args_value)?;
                self.handle_update(args).await
            }
            "memory_themes" => {
                let args: ThemesArgs = parse_args(&args_value)?;
                self.handle_themes(args).await
            }
            "memory_facts_about" => {
                let args: FactsAboutArgs = parse_args(&args_value)?;
                self.handle_facts_about(args).await
            }
            "memory_entities" => {
                let args: EntitiesArgs = parse_args(&args_value)?;
                self.handle_entities(args).await
            }
            "memory_contradictions" => {
                let args: ContradictionsArgs = parse_args(&args_value)?;
                self.handle_contradictions(args).await
            }
            "memory_contradiction_resolve" => {
                let args: ContradictionResolveArgs = parse_args(&args_value)?;
                self.handle_contradiction_resolve(args).await
            }
            "memory_inspect_cluster" => {
                let args: InspectClusterArgs = parse_args(&args_value)?;
                self.handle_inspect_cluster(args).await
            }
            "memory_ingest_document" => {
                let args: IngestDocumentArgs = parse_args(&args_value)?;
                self.handle_ingest_document(args, progress).await
            }
            "memory_search_docs" => {
                let args: SearchDocsArgs = parse_args(&args_value)?;
                self.handle_search_docs(args, progress).await
            }
            "memory_inspect_document" => {
                let args: InspectDocumentArgs = parse_args(&args_value)?;
                self.handle_inspect_document(args).await
            }
            "memory_list_documents" => {
                let args: ListDocumentsArgs = parse_args(&args_value)?;
                self.handle_list_documents(args).await
            }
            "memory_forget_document" => {
                let args: ForgetDocumentArgs = parse_args(&args_value)?;
                self.handle_forget_document(args).await
            }
            other => Err(McpError::invalid_params(
                format!("unknown tool `{other}`"),
                None,
            )),
        }
    }

    /// List the tools this server exposes. Mirrors `ServerHandler::list_tools`
    /// without requiring a RequestContext.
    pub fn dispatch_list_tools(&self) -> Vec<Tool> {
        build_tools()
    }
}

fn parse_args<T: serde::de::DeserializeOwned>(
    v: &serde_json::Value,
) -> std::result::Result<T, McpError> {
    serde_json::from_value(v.clone())
        .map_err(|e| McpError::invalid_params(format!("invalid tool arguments: {e}"), None))
}

fn solo_to_mcp(e: solo_core::Error) -> McpError {
    use solo_core::Error;
    match e {
        Error::NotFound(msg) => McpError::invalid_params(msg, None),
        Error::InvalidInput(msg) => McpError::invalid_params(msg, None),
        Error::Conflict(msg) => McpError::invalid_params(msg, None),
        other => McpError::internal_error(other.to_string(), None),
    }
}

// ---------------------------------------------------------------------------
// Tool definitions (JSON Schema)
// ---------------------------------------------------------------------------

fn build_tools() -> Vec<Tool> {
    vec![
        Tool::new(
            "memory_remember",
            "Save something the user has told you — a fact, a \
             preference, a name, a date, a context — so you can pick \
             it up next conversation. Use whenever the user mentions \
             something they'd reasonably expect you to recall later \
             (\"I just started at Quotient\", \"my partner is Maya\"). \
             Returns the saved item's id.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "content": {
                        "type": "string",
                        "description": "The text to remember.",
                    },
                    "source_type": {
                        "type": "string",
                        "description": "Optional source-type tag (default: \"user_message\"). See docs/mcp/source-types.md for convention values.",
                    },
                    "source_id": {
                        "type": "string",
                        "description": "Optional upstream id for traceability.",
                    },
                    "salience": {
                        "type": "number",
                        "description": "Optional salience in [0.0, 1.0]; defaults to 0.5. Higher values bias toward recall ranking + retention. v0.9.2+.",
                        "minimum": 0.0,
                        "maximum": 1.0,
                    },
                },
                "required": ["content"],
            })),
        ),
        // v0.9.2 — atomic batched-remember for agentic clients. Wraps
        // every item in one BEGIN IMMEDIATE tx so a single
        // `memory_remember_batch` call either persists all N items or
        // none. Designed for the solo-jarvis turn-flush pattern (per
        // dev-log 0120 §1).
        Tool::new(
            "memory_remember_batch",
            "Save several items atomically in one transaction — either \
             every item lands or none does. Use this when you have a \
             collection of related episodes from one logical step (a \
             conversation turn, a tool-output bundle, an ingest batch) \
             and partial success would leave the user's memory in a \
             confusing half-state. Each item carries the same fields as \
             memory_remember (content + optional source_type, source_id, \
             salience). Returns an ordered array of memory_ids matching \
             the input items. v0.9.2+.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "items": {
                        "type": "array",
                        "description": format!(
                            "Items to remember atomically. Max {} per call.",
                            solo_storage::MAX_REMEMBER_BATCH_SIZE,
                        ),
                        "minItems": 1,
                        // SOURCE OF TRUTH: solo_storage::MAX_REMEMBER_BATCH_SIZE.
                        // Both the numeric `maxItems` and the human-readable
                        // `description` above interpolate from this constant
                        // so they can never drift. Pinned by
                        // `remember_batch_maxitems_matches_max_batch_size`
                        // in the test module.
                        "maxItems": solo_storage::MAX_REMEMBER_BATCH_SIZE,
                        "items": {
                            "type": "object",
                            "properties": {
                                "content": {
                                    "type": "string",
                                    "description": "The text to remember.",
                                },
                                "source_type": {
                                    "type": "string",
                                    "description": "Optional source-type tag (default: \"user_message\"). See docs/mcp/source-types.md.",
                                },
                                "source_id": {
                                    "type": "string",
                                    "description": "Optional upstream id for traceability.",
                                },
                                "salience": {
                                    "type": "number",
                                    "description": "Optional salience in [0.0, 1.0]; defaults to 0.5.",
                                    "minimum": 0.0,
                                    "maximum": 1.0,
                                },
                            },
                            "required": ["content"],
                        },
                    },
                },
                "required": ["items"],
            })),
        ),
        Tool::new(
            "memory_recall",
            "Search past conversations with this user by topic or \
             phrase. Returns up to `limit` of the closest matches, \
             best match first. Use when the user references \
             something they said before (\"that book I told you \
             about\", \"the bug we were debugging last week\"). \
             Skips items the user has deleted.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The query text.",
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Maximum results (default 5).",
                        "minimum": 1,
                        "maximum": 100,
                    },
                },
                "required": ["query"],
            })),
        ),
        Tool::new(
            "memory_context",
            "Build a compact working-memory bundle for an agent turn. \
             Use this near the start of a substantial answer or task \
             when remembered context may matter. It combines raw \
             episodic recall, recent themes, optional structured facts \
             about `subject`, and known contradictions so clients can \
             ground answers without making four separate calls.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Natural-language query for episodic recall.",
                    },
                    "subject": {
                        "type": "string",
                        "description": "Optional subject for structured facts. When present, facts also match object-position references.",
                    },
                    "window_days": {
                        "type": "integer",
                        "description": "Optional recency window in days for themes. Omit for unfiltered.",
                        "minimum": 1,
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Per-section maximum results (default 5).",
                        "minimum": 1,
                        "maximum": 100,
                    },
                },
                "required": ["query"],
            })),
        ),
        Tool::new(
            "memory_forget",
            "Delete one saved item by id. Use when the user asks you \
             to forget something specific (\"forget that I said \
             X\"). The item stops appearing in future recalls. \
             Reversible only via backups.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "memory_id": {
                        "type": "string",
                        "description": "MemoryId to forget (UUID v7).",
                    },
                    "reason": {
                        "type": "string",
                        "description": "Optional free-form reason (logged, not yet persisted).",
                    },
                },
                "required": ["memory_id"],
            })),
        ),
        Tool::new(
            "memory_inspect",
            "Show the full record for one saved item — when it was \
             saved, where it came from, and the full text. Use after \
             memory_recall when you want the complete content of a \
             specific hit (recall results may be truncated).",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "memory_id": {
                        "type": "string",
                        "description": "MemoryId to inspect (UUID v7).",
                    },
                },
                "required": ["memory_id"],
            })),
        ),
        Tool::new(
            "memory_update",
            "Correct one active saved memory and refresh its embedding \
             and search index entry. Use when the user says a remembered \
             episode is wrong or outdated and provides the corrected \
             wording. Returns the updated memory id, rowid, content, and \
             timestamp.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "memory_id": {
                        "type": "string",
                        "description": "MemoryId to update (UUID v7).",
                    },
                    "content": {
                        "type": "string",
                        "description": "Replacement content for the active memory.",
                        "minLength": 1,
                    },
                },
                "required": ["memory_id", "content"],
            })),
        ),
        // Path 1 derived-layer tools (v0.4.0+) — query the Steward's
        // outputs. These are populated by `solo consolidate` and were
        // previously unreadable except via direct SQL.
        Tool::new(
            "memory_themes",
            "Recent topics the user has been thinking about. Use to \
             orient yourself at the start of a conversation, or when \
             the user asks \"what have I been up to\" / \"what was I \
             working on last week\". Pass `window_days` to scope \
             (e.g. 7 for last week); omit for all-time.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "window_days": {
                        "type": "integer",
                        "description": "Optional time window in days. Omit for unfiltered.",
                        "minimum": 1,
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Maximum results (default 5).",
                        "minimum": 1,
                        "maximum": 100,
                    },
                },
                "required": [],
            })),
        ),
        Tool::new(
            "memory_facts_about",
            "Look up what you remember about a person, project, or \
             topic — names, dates, preferences, relationships. Use \
             when the user asks \"what do you know about Alex?\", \
             \"when did I start at Quotient?\", \"who is Maya?\", or \
             whenever you need grounded facts about someone or \
             something before answering. Subject is required (the \
             person/place/thing you're asking about); narrow further \
             with `predicate` (\"works_at\", \"lives_in\") or a date \
             range. Set `include_as_object=true` to also surface \
             facts where the subject appears on the receiving side of \
             a relationship (e.g. \"Sam pushes back on PRs about \
             Maya\" surfaces under facts_about(subject=\"Maya\", \
             include_as_object=true)). (Backed by \
             subject-predicate-object triples distilled from past \
             conversations.) Clients should set a 30s timeout on this \
             call; if exceeded, retry once or fall back to \
             `memory_recall`.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "subject": {
                        "type": "string",
                        "description": "Subject id to query (e.g. 'Sam').",
                    },
                    "predicate": {
                        "type": "string",
                        "description": "Optional predicate filter (e.g. 'works_at').",
                    },
                    "since_ms": {
                        "type": "integer",
                        "description": "Optional valid_from_ms lower bound (epoch ms).",
                    },
                    "until_ms": {
                        "type": "integer",
                        "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through.",
                    },
                    "include_as_object": {
                        "type": "boolean",
                        "description": "If true, also match facts where `subject` appears as the object (e.g. 'Sam pushes back on PRs about Maya' surfaces under subject='Maya'). Default false.",
                        "default": false,
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Maximum results (default 5).",
                        "minimum": 1,
                        "maximum": 100,
                    },
                },
                "required": ["subject"],
            })),
        ),
        Tool::new(
            "memory_entities",
            "Discover entity ids from the structured-fact graph. Use \
             before memory_facts_about when you are not sure how a \
             person, project, or topic is keyed in memory, or when the \
             user gives a partial name. Returns entity ids with fact \
             counts and common predicates.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Partial or exact entity id to search for.",
                        "minLength": 1,
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Maximum results (default 5).",
                        "minimum": 1,
                        "maximum": 100,
                    },
                },
                "required": ["query"],
            })),
        ),
        Tool::new(
            "memory_contradictions",
            "Find places where the user's stated beliefs or facts \
             disagree across conversations — flag disagreements \
             before answering. Use whenever you're about to rely on \
             a remembered fact that could have changed (jobs, \
             relationships, preferences, opinions); a disagreement \
             here means the user has told you both X and not-X over \
             time and you should ask which is current instead of \
             guessing. Each result shows both conflicting statements \
             with the topic.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "limit": {
                        "type": "integer",
                        "description": "Maximum results (default 5).",
                        "minimum": 1,
                        "maximum": 100,
                    },
                },
                "required": [],
            })),
        ),
        Tool::new(
            "memory_contradiction_resolve",
            "Mark one flagged contradiction as resolved, unresolved, \
             or reopened. Use after the user clarifies which side is \
             current. Pass the a_id, b_id, and kind from \
             memory_contradictions; status defaults to resolved.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "a_id": {
                        "type": "string",
                        "description": "First contradiction id from memory_contradictions.",
                    },
                    "b_id": {
                        "type": "string",
                        "description": "Second contradiction id from memory_contradictions.",
                    },
                    "kind": {
                        "type": "string",
                        "description": "Contradiction kind from memory_contradictions.",
                    },
                    "status": {
                        "type": "string",
                        "enum": ["unresolved", "resolved", "reopened"],
                        "default": "resolved",
                        "description": "New lifecycle status.",
                    },
                    "resolution_note": {
                        "type": "string",
                        "description": "Optional human-readable clarification.",
                    },
                    "winning_triple_id": {
                        "type": "string",
                        "description": "Optional triple id to treat as the current/correct side.",
                    },
                },
                "required": ["a_id", "b_id", "kind"],
            })),
        ),
        Tool::new(
            "memory_inspect_cluster",
            "Show the raw conversations behind one summary. Returns \
             the one-line topic (the LLM-generated summary) and the \
             source conversations the topic was built from. Use \
             after memory_themes when the user asks \"show me the \
             raw context behind this\" or \"why does Solo think \
             that about cluster Y\". Source items are truncated to \
             200 chars unless `full_content` is set.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "cluster_id": {
                        "type": "string",
                        "description": "Cluster id to inspect (from memory_themes hits).",
                    },
                    "full_content": {
                        "type": "boolean",
                        "description": "If true, episode content is returned verbatim. Default false (truncate to 200 chars + ellipsis).",
                    },
                },
                "required": ["cluster_id"],
            })),
        ),
        // Document tools (v0.7.0+). RAG over user-supplied files —
        // markdown notes, PDFs, runbooks, code, etc. Same vector space
        // as episodes; same embedder; same HNSW index.
        Tool::new(
            "memory_ingest_document",
            "Read a file from disk and add it to the user's document \
             library so it becomes searchable alongside past \
             conversations. Use when the user asks you to remember a \
             whole file (\"add my notes/runbook.md\", \"ingest this \
             PDF\"). The file is split into ~500-token chunks and \
             each chunk is embedded; chunks then surface through \
             memory_search_docs. Returns the new document id, chunk \
             count, and a `deduped` flag (true if the same content \
             was already ingested under another id).",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "path": {
                        "type": "string",
                        "description": "Server-side absolute path to the file to ingest. The file must be readable by the Solo process.",
                    },
                },
                "required": ["path"],
            })),
        ),
        Tool::new(
            "memory_search_docs",
            "Search across the user's ingested documents by topic or \
             phrase. Returns up to `limit` matching chunks, best \
             match first, each with the parent document's title + \
             source path so you can cite where the answer came from. \
             Use when the user asks a question that hinges on \
             material they've added as a file (\"what does my \
             runbook say about backups?\", \"find the section in the \
             notes about the new policy\"). Forgotten documents are \
             skipped.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The query text.",
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Maximum results (default 5).",
                        "minimum": 1,
                        "maximum": 100,
                    },
                },
                "required": ["query"],
            })),
        ),
        Tool::new(
            "memory_inspect_document",
            "Show one document's metadata plus a preview of every \
             chunk it was split into. Use after memory_search_docs \
             when the user wants the bigger picture for one hit \
             (\"show me the whole document this came from\"), or \
             after memory_list_documents to drill into one entry. \
             Each chunk preview is truncated to 200 chars.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "doc_id": {
                        "type": "string",
                        "description": "Document id to inspect (UUID v7).",
                    },
                },
                "required": ["doc_id"],
            })),
        ),
        Tool::new(
            "memory_list_documents",
            "List the user's ingested documents, newest first. Use \
             when the user asks \"what documents have I added?\" or \
             \"show me my files\". Returns a paginated index — pass \
             `offset` to page further back. Forgotten documents are \
             hidden by default; set `include_forgotten=true` to see \
             them too.",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "limit": {
                        "type": "integer",
                        "description": "Maximum results per page (default 20).",
                        "minimum": 1,
                        "maximum": 100,
                    },
                    "offset": {
                        "type": "integer",
                        "description": "Number of rows to skip (for paging). Default 0.",
                        "minimum": 0,
                    },
                    "include_forgotten": {
                        "type": "boolean",
                        "description": "If true, also include documents the user has forgotten. Default false.",
                    },
                },
            })),
        ),
        Tool::new(
            "memory_forget_document",
            "Drop one document from the user's library by id. Use \
             when the user asks you to forget a specific file \
             (\"forget my old runbook\"). The document's chunks stop \
             appearing in memory_search_docs and the vectors are \
             tombstoned in the index. The chunk rows themselves are \
             kept for forensic value (a future restore command can \
             undo this).",
            json_schema_object(serde_json::json!({
                "type": "object",
                "properties": {
                    "doc_id": {
                        "type": "string",
                        "description": "Document id to forget (UUID v7).",
                    },
                },
                "required": ["doc_id"],
            })),
        ),
    ]
}

fn json_schema_object(value: serde_json::Value) -> serde_json::Map<String, serde_json::Value> {
    match value {
        serde_json::Value::Object(map) => map,
        _ => panic!("json_schema_object: input must be an object"),
    }
}

/// Names of every tool this server exposes, in registration order.
///
/// Exposed for cross-crate consumers (notably `solo doctor
/// --check-mcp-compat`) that want the name list without paying the
/// cost of building full `rmcp::Tool` records (which allocate JSON
/// schemas). The registration order matches `build_tools()` so any
/// drift between the two would be caught by the cross-provider regex
/// test which iterates `build_tools()`.
pub fn tool_names() -> Vec<&'static str> {
    vec![
        "memory_remember",
        // v0.9.2 — batched-remember for agentic clients (solo-jarvis).
        "memory_remember_batch",
        "memory_recall",
        "memory_context",
        "memory_forget",
        "memory_inspect",
        "memory_update",
        "memory_themes",
        "memory_facts_about",
        "memory_entities",
        "memory_contradictions",
        "memory_contradiction_resolve",
        "memory_inspect_cluster",
        // Document tools added in v0.7.0:
        "memory_ingest_document",
        "memory_search_docs",
        "memory_inspect_document",
        "memory_list_documents",
        "memory_forget_document",
    ]
}

// ---------------------------------------------------------------------------
// Tool handlers
// ---------------------------------------------------------------------------

impl SoloMcpServer {
    async fn handle_remember(
        &self,
        args: RememberArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let content = args.content.trim_end().to_string();
        if content.is_empty() {
            return Err(McpError::invalid_params(
                "memory_remember: content must not be empty".to_string(),
                None,
            ));
        }
        validate_salience(args.salience)?;
        let embedding: solo_core::Embedding = self
            .inner
            .tenant
            .embedder()
            .embed(&content)
            .await
            .map_err(solo_to_mcp)?;
        let episode = Episode {
            memory_id: MemoryId::new(),
            ts_ms: chrono::Utc::now().timestamp_millis(),
            source_type: args.source_type.unwrap_or_else(|| "user_message".into()),
            source_id: args.source_id,
            content,
            encoding_context: EncodingContext::default(),
            provenance: None,
            confidence: Confidence::new(0.9).expect("0.9 is in [0.0, 1.0]"),
            strength: 0.5,
            // v0.9.2: caller-supplied salience overrides the default. The
            // `validate_salience` call above has already rejected NaN /
            // out-of-range values.
            salience: args.salience.unwrap_or(0.5),
            tier: Tier::Hot,
        };
        let mid = self
            .inner
            .tenant
            .write()
            .remember_as(self.inner.audit_principal.clone(), episode, embedding)
            .await
            .map_err(solo_to_mcp)?;
        Ok(CallToolResult::success(vec![Content::text(format!(
            "remembered {mid}"
        ))]))
    }

    /// v0.9.2 — handler for `memory_remember_batch`.
    ///
    /// Pipeline (mirrors `handle_remember` over N items):
    ///   1. Validate batch (non-empty, ≤ `MAX_REMEMBER_BATCH_SIZE`,
    ///      per-item content non-empty, per-item salience in [0.0, 1.0]).
    ///   2. Embed all items sequentially via the tenant's embedder.
    ///      We don't `join_all` here because the in-process embedder
    ///      paths today (stub, local-Anthropic, OpenAI) are individually
    ///      fast and serial is robust against rate-limit surprises (per
    ///      dev-log 0120 §8 R2 mitigation: existing embedder
    ///      throttling guards parallel fan-out; serial gives identical
    ///      semantics with simpler error paths). Parallel fan-out is a
    ///      v0.9.3 optimization once the batch tool has live traffic.
    ///   3. Build `Vec<(Episode, Embedding)>` with default Confidence /
    ///      strength / tier — same shape as single-Remember.
    ///   4. Dispatch via `WriteHandle::remember_batch_as`, which wraps
    ///      every INSERT in ONE `BEGIN IMMEDIATE` tx (ADR-0003 invariant
    ///      preserved).
    ///   5. Reply is `Vec<MemoryId>` in input order; serialise to JSON.
    async fn handle_remember_batch(
        &self,
        args: RememberBatchArgs,
        progress: Option<crate::mcp_progress::ProgressReporter>,
    ) -> std::result::Result<CallToolResult, McpError> {
        // 1. Batch-shape validation. The writer-actor will re-check
        //    `MAX_REMEMBER_BATCH_SIZE` (dev-log 0120 §3 Decision F) and
        //    reject with `InvalidInput` — we mirror the check here to
        //    avoid the round-trip into the writer + the embedder calls
        //    when the request is obviously over-cap.
        if args.items.is_empty() {
            return Err(McpError::invalid_params(
                "memory_remember_batch: items must not be empty".to_string(),
                None,
            ));
        }
        if args.items.len() > solo_storage::MAX_REMEMBER_BATCH_SIZE {
            return Err(McpError::invalid_params(
                format!(
                    "memory_remember_batch: {} items exceeds MAX_REMEMBER_BATCH_SIZE = {}",
                    args.items.len(),
                    solo_storage::MAX_REMEMBER_BATCH_SIZE,
                ),
                None,
            ));
        }
        for (i, item) in args.items.iter().enumerate() {
            if item.content.trim_end().is_empty() {
                return Err(McpError::invalid_params(
                    format!("memory_remember_batch: items[{i}].content must not be empty"),
                    None,
                ));
            }
            validate_salience(item.salience).map_err(|e| {
                // Re-wrap with the index so the caller can pinpoint
                // which item tripped the validator.
                McpError::invalid_params(
                    format!("memory_remember_batch: items[{i}].{}", e.message),
                    None,
                )
            })?;
        }

        // v0.11.0 P3: progress emission is gated on batch size — below
        // the threshold (50 items) the wire-overhead of progress
        // notifications outweighs the UX benefit. Above threshold +
        // client opted in (`reporter.is_some()`), emit one event per
        // `MCP_REMEMBER_BATCH_PROGRESS_EMIT_EVERY` items during the
        // embed loop + one terminal "embedded" + one "inserted" event.
        let total = args.items.len() as u64;
        let progress_active = progress.is_some()
            && args.items.len() > crate::mcp_progress::MCP_REMEMBER_BATCH_PROGRESS_ITEM_THRESHOLD;
        let progress_reporter = if progress_active {
            progress.as_ref()
        } else {
            None
        };

        // 2. Embed each item. Serial fan-out (see doc comment above).
        let embedder = self.inner.tenant.embedder();
        let now_ms = chrono::Utc::now().timestamp_millis();
        let mut pairs: Vec<(Episode, solo_core::Embedding)> = Vec::with_capacity(args.items.len());
        for (i, item) in args.items.into_iter().enumerate() {
            let content = item.content.trim_end().to_string();
            let embedding = embedder.embed(&content).await.map_err(solo_to_mcp)?;
            let episode = Episode {
                memory_id: MemoryId::new(),
                ts_ms: now_ms,
                source_type: item.source_type.unwrap_or_else(|| "user_message".into()),
                source_id: item.source_id,
                content,
                encoding_context: EncodingContext::default(),
                provenance: None,
                confidence: Confidence::new(0.9).expect("0.9 is in [0.0, 1.0]"),
                strength: 0.5,
                salience: item.salience.unwrap_or(0.5),
                tier: Tier::Hot,
            };
            pairs.push((episode, embedding));
            // v0.11.0 P3 checkpoint A — embed progress, every N items.
            // `(i + 1) % EMIT_EVERY == 0` emits at items 25, 50, 75, ...
            // The terminal "embedded" event below covers any remainder.
            let done = (i + 1) as u64;
            if (i + 1) % crate::mcp_progress::MCP_REMEMBER_BATCH_PROGRESS_EMIT_EVERY == 0 {
                crate::mcp_progress::report_if_some(
                    progress_reporter,
                    done,
                    Some(total),
                    Some("embedding"),
                );
            }
        }

        // v0.11.0 P3 checkpoint B — all items embedded; about to land
        // in writer-actor. Always-emitted (when progress_active) so a
        // batch that wasn't a multiple of EMIT_EVERY still gets a
        // final embed-phase event.
        crate::mcp_progress::report_if_some(
            progress_reporter,
            total,
            Some(total),
            Some("embedded"),
        );

        // 3. Dispatch into the writer-actor. The batch lands as one tx.
        let memory_ids = self
            .inner
            .tenant
            .write()
            .remember_batch_as(self.inner.audit_principal.clone(), pairs)
            .await
            .map_err(solo_to_mcp)?;

        // v0.11.0 P3 checkpoint C — writer-actor committed. The reply
        // body below also lands in the POST response, but this event
        // gives a client subscribed to the GET stream early confirmation
        // that the row is committed without waiting for the POST to
        // return (network buffering can stall the POST response
        // marginally; the SSE event is immediate).
        crate::mcp_progress::report_if_some(
            progress_reporter,
            total,
            Some(total),
            Some("inserted"),
        );

        // 4. Reply: JSON-serialised array of memory ids in input order.
        //    Stringified so MCP clients see UUID strings (matches single
        //    `memory_remember`'s reply shape — both speak strings on
        //    the wire).
        let ids_as_strings: Vec<String> = memory_ids.iter().map(|m| m.to_string()).collect();
        let body = serde_json::to_string(&ids_as_strings)
            .map_err(|e| McpError::internal_error(format!("serialize batch reply: {e}"), None))?;
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_recall(
        &self,
        args: RecallArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        // Pipeline lives in solo-query; the transport just formats the
        // result. solo_query::run_recall validates empty queries
        // (returns InvalidInput → invalid_params via solo_to_mcp).
        let result = solo_query::run_recall(
            self.inner.tenant.as_ref(),
            self.inner.audit_principal.clone(),
            &args.query,
            args.limit,
        )
        .await
        .map_err(solo_to_mcp)?;

        // Always return a JSON array of hits (possibly empty) so clients
        // can `JSON.parse` uniformly. The previous shape returned a
        // plain-English string ("no matches (index has N vectors)") on
        // empty results, which broke any client parsing recall as JSON.
        // The `index_len` diagnostic is preserved as an MCP `Content` text
        // alongside the JSON payload — agents see both; tooling parses the
        // first content as JSON.
        let body = serde_json::to_string_pretty(&result.hits).unwrap_or_else(|_| "[]".to_string());
        let mut contents = vec![Content::text(body)];
        if result.hits.is_empty() {
            contents.push(Content::text(format!(
                "(index has {} vectors)",
                result.index_len
            )));
        }
        Ok(CallToolResult::success(contents))
    }

    async fn handle_memory_context(
        &self,
        args: MemoryContextArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let result = solo_query::memory_context(
            self.inner.tenant.as_ref(),
            self.inner.audit_principal.clone(),
            &args.query,
            args.subject.as_deref(),
            &self.inner.user_aliases,
            args.window_days,
            args.limit,
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&result).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_forget(
        &self,
        args: ForgetArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let mid = MemoryId::from_str(&args.memory_id)
            .map_err(|e| McpError::invalid_params(format!("invalid memory_id: {e}"), None))?;
        self.inner
            .tenant
            .write()
            .forget_as(self.inner.audit_principal.clone(), mid, args.reason)
            .await
            .map_err(solo_to_mcp)?;
        Ok(CallToolResult::success(vec![Content::text(format!(
            "forgotten {mid}"
        ))]))
    }

    async fn handle_inspect(
        &self,
        args: InspectArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let mid = MemoryId::from_str(&args.memory_id)
            .map_err(|e| McpError::invalid_params(format!("invalid memory_id: {e}"), None))?;
        // Pipeline lives in solo-query::inspect; transports just format.
        let row = solo_query::inspect_one(
            self.inner.tenant.read(),
            self.inner.tenant.audit(),
            self.inner.audit_principal.clone(),
            mid,
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&row).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_update(
        &self,
        args: UpdateArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let mid = MemoryId::from_str(&args.memory_id)
            .map_err(|e| McpError::invalid_params(format!("invalid memory_id: {e}"), None))?;
        if args.content.trim().is_empty() {
            return Err(McpError::invalid_params(
                "memory_update: content must not be empty".to_string(),
                None,
            ));
        }
        let result = solo_query::memory_update(
            self.inner.tenant.as_ref(),
            self.inner.audit_principal.clone(),
            mid,
            &args.content,
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&result).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    // Path 1 derived-layer handlers (v0.4.0+). Each one delegates to a
    // single solo-query::derived pipeline and serialises the result Vec
    // to pretty JSON for the MCP wire. Empty result → JSON empty array
    // `[]` (not a special-case "no matches" string) so MCP clients can
    // parse uniformly.

    async fn handle_themes(
        &self,
        args: ThemesArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let hits = solo_query::themes(
            self.inner.tenant.read(),
            self.inner.tenant.audit(),
            self.inner.audit_principal.clone(),
            args.window_days,
            args.limit,
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_facts_about(
        &self,
        args: FactsAboutArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        if args.subject.trim().is_empty() {
            return Err(McpError::invalid_params(
                "memory_facts_about: subject must not be empty".to_string(),
                None,
            ));
        }
        let hits = solo_query::facts_about(
            self.inner.tenant.read(),
            self.inner.tenant.audit(),
            self.inner.audit_principal.clone(),
            &args.subject,
            &self.inner.user_aliases,
            args.include_as_object,
            args.predicate.as_deref(),
            args.since_ms,
            args.until_ms,
            args.limit,
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_entities(
        &self,
        args: EntitiesArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        if args.query.trim().is_empty() {
            return Err(McpError::invalid_params(
                "memory_entities: query must not be empty".to_string(),
                None,
            ));
        }
        let hits = solo_query::entities(
            self.inner.tenant.read(),
            self.inner.tenant.audit(),
            self.inner.audit_principal.clone(),
            &args.query,
            args.limit,
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_contradictions(
        &self,
        args: ContradictionsArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let hits = solo_query::contradictions(
            self.inner.tenant.read(),
            self.inner.tenant.audit(),
            self.inner.audit_principal.clone(),
            args.limit,
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_contradiction_resolve(
        &self,
        args: ContradictionResolveArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        if args.a_id.trim().is_empty() || args.b_id.trim().is_empty() || args.kind.trim().is_empty()
        {
            return Err(McpError::invalid_params(
                "memory_contradiction_resolve: a_id, b_id, and kind must not be empty".to_string(),
                None,
            ));
        }
        // Dev-log 0152 H1: routed through the writer actor so the
        // UPDATE + audit row are atomic. The signature still takes
        // reader-pool + audit for now (deprecated; ignored by the
        // function body).
        let result = solo_query::resolve_contradiction(
            self.inner.tenant.write(),
            self.inner.tenant.read(),
            self.inner.tenant.audit(),
            self.inner.audit_principal.clone(),
            &args.a_id,
            &args.b_id,
            &args.kind,
            &args.status,
            args.resolution_note.as_deref(),
            args.winning_triple_id.as_deref(),
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&result).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_inspect_cluster(
        &self,
        args: InspectClusterArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        if args.cluster_id.trim().is_empty() {
            return Err(McpError::invalid_params(
                "memory_inspect_cluster: cluster_id must not be empty".to_string(),
                None,
            ));
        }
        // `solo_to_mcp` maps `Error::NotFound` → `invalid_params` for
        // MCP (the protocol does not have a separate "not found" error
        // shape; clients see the message verbatim, which includes the
        // cluster_id).
        let record = solo_query::inspect_cluster(
            self.inner.tenant.read(),
            self.inner.tenant.audit(),
            self.inner.audit_principal.clone(),
            &args.cluster_id,
            args.full_content,
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    // Document handlers (v0.7.0+). Each wraps the corresponding writer
    // / query API; the MCP wire shape is plain JSON serialisation of
    // the returned report / records.

    async fn handle_ingest_document(
        &self,
        args: IngestDocumentArgs,
        progress: Option<crate::mcp_progress::ProgressReporter>,
    ) -> std::result::Result<CallToolResult, McpError> {
        if args.path.trim().is_empty() {
            return Err(McpError::invalid_params(
                "memory_ingest_document: path must not be empty".to_string(),
                None,
            ));
        }
        let path = std::path::PathBuf::from(args.path);
        // Defaults match what the daemon uses today (target 500 tokens,
        // 50-token overlap). Future: thread a per-call override through
        // the args struct if a use case appears.
        let chunk_config = solo_storage::document::ChunkConfig::default();

        // v0.11.0 P3: ingest checkpoints. The writer-actor's
        // `ingest_document_as` is one opaque command that internally
        // performs parse → chunk → embed → SQL insert; we bookend it
        // with phase-marker progress events. The 4-phase taxonomy
        // matches the MCP spec brief — `total=4`, `progress` walks 1
        // → 4 — even though phases 1 and 2 (parse, chunk) emit before
        // the writer call and 3 and 4 (embed, insert) emit after.
        // Real chunk-by-chunk progress would require redesigning the
        // writer command shape (cross-cuts ADR-0003); P3's bookend
        // pattern stays additive without touching the writer.
        const INGEST_TOTAL_PHASES: u64 = 4;
        crate::mcp_progress::report_if_some(
            progress.as_ref(),
            1,
            Some(INGEST_TOTAL_PHASES),
            Some("parsed"),
        );
        crate::mcp_progress::report_if_some(
            progress.as_ref(),
            2,
            Some(INGEST_TOTAL_PHASES),
            Some("chunked"),
        );

        let report = self
            .inner
            .tenant
            .write()
            .ingest_document_as(self.inner.audit_principal.clone(), path, chunk_config)
            .await
            .map_err(solo_to_mcp)?;

        crate::mcp_progress::report_if_some(
            progress.as_ref(),
            3,
            Some(INGEST_TOTAL_PHASES),
            Some("embedded"),
        );
        // Final event includes the real chunk count from the report;
        // the per-event `message` field carries it so clients can
        // surface "N chunks indexed" without parsing the POST reply
        // body.
        crate::mcp_progress::report_if_some(
            progress.as_ref(),
            INGEST_TOTAL_PHASES,
            Some(INGEST_TOTAL_PHASES),
            Some(&format!("inserted {} chunks", report.chunks_persisted)),
        );

        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_search_docs(
        &self,
        args: SearchDocsArgs,
        progress: Option<crate::mcp_progress::ProgressReporter>,
    ) -> std::result::Result<CallToolResult, McpError> {
        // v0.11.0 P3: progress emission for search is gated on `top_k`
        // (passed via `args.limit`) — below 100 the search completes
        // fast enough that progress notifications add wire-overhead
        // with no UX benefit (Decision C). Above threshold + client
        // opted in, emit 3 phase-marker events around the query call.
        let top_k = args.limit as u32;
        let progress_active = progress.is_some()
            && top_k > crate::mcp_progress::MCP_SEARCH_DOCS_PROGRESS_TOP_K_THRESHOLD;
        let progress_reporter = if progress_active {
            progress.as_ref()
        } else {
            None
        };
        const SEARCH_TOTAL_PHASES: u64 = 3;
        crate::mcp_progress::report_if_some(
            progress_reporter,
            1,
            Some(SEARCH_TOTAL_PHASES),
            Some("hnsw_lookup"),
        );

        // `solo_query::run_doc_search` validates empty queries (returns
        // InvalidInput → invalid_params via solo_to_mcp) and clamps
        // limit upstream of the embedder call.
        let hits = solo_query::run_doc_search(
            self.inner.tenant.as_ref(),
            self.inner.audit_principal.clone(),
            &args.query,
            args.limit,
        )
        .await
        .map_err(solo_to_mcp)?;

        crate::mcp_progress::report_if_some(
            progress_reporter,
            2,
            Some(SEARCH_TOTAL_PHASES),
            Some("reranked"),
        );
        crate::mcp_progress::report_if_some(
            progress_reporter,
            SEARCH_TOTAL_PHASES,
            Some(SEARCH_TOTAL_PHASES),
            Some(&format!("returning {} hits", hits.len())),
        );

        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_inspect_document(
        &self,
        args: InspectDocumentArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let doc_id = DocumentId::from_str(&args.doc_id)
            .map_err(|e| McpError::invalid_params(format!("invalid doc_id: {e}"), None))?;
        let result_opt = solo_query::inspect_document(
            self.inner.tenant.read(),
            self.inner.tenant.audit(),
            self.inner.audit_principal.clone(),
            &doc_id,
        )
        .await
        .map_err(solo_to_mcp)?;
        match result_opt {
            Some(record) => {
                let body = serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
                Ok(CallToolResult::success(vec![Content::text(body)]))
            }
            None => Err(McpError::invalid_params(
                format!("document {doc_id} not found"),
                None,
            )),
        }
    }

    async fn handle_list_documents(
        &self,
        args: ListDocumentsArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let rows = solo_query::list_documents(
            self.inner.tenant.read(),
            self.inner.tenant.audit(),
            self.inner.audit_principal.clone(),
            args.limit,
            args.offset,
            args.include_forgotten,
        )
        .await
        .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&rows).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }

    async fn handle_forget_document(
        &self,
        args: ForgetDocumentArgs,
    ) -> std::result::Result<CallToolResult, McpError> {
        let doc_id = DocumentId::from_str(&args.doc_id)
            .map_err(|e| McpError::invalid_params(format!("invalid doc_id: {e}"), None))?;
        let report = self
            .inner
            .tenant
            .write()
            .forget_document_as(self.inner.audit_principal.clone(), doc_id)
            .await
            .map_err(solo_to_mcp)?;
        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
        Ok(CallToolResult::success(vec![Content::text(body)]))
    }
}

#[cfg(test)]
mod dispatch_tests {
    //! In-process integration tests for the MCP tool surface. We invoke
    //! `SoloMcpServer::dispatch_tool` directly (bypasses the rmcp
    //! protocol framing + `RequestContext`, which requires a `Peer`
    //! that's not constructible outside rmcp internals). The server is
    //! constructed against a real WriterActor + ReaderPool +
    //! StubEmbedder + StubVectorIndex from `solo_storage::test_support`.
    //!
    //! Tests live inline in this module rather than `tests/` because an
    //! external integration-test exe in `target/debug/deps/mcp_dispatch-*`
    //! tripped Windows UAC ERROR_ELEVATION_REQUIRED on the dev machine.
    //! The lib test binary doesn't have that issue.
    use super::*;
    use serde_json::json;
    use solo_core::VectorIndex;
    use solo_storage::test_support::StubVectorIndex;
    use solo_storage::{
        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig, StubEmbedder,
        TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
    };
    use std::sync::Arc as StdArc;

    fn fake_config(dim: u32) -> SoloConfig {
        SoloConfig {
            schema_version: 1,
            salt_hex: "00000000000000000000000000000000".to_string(),
            embedder: EmbedderConfig {
                name: "stub".to_string(),
                version: "v1".to_string(),
                dim,
                dtype: "f32".to_string(),
            },
            identity: IdentityConfig::default(),
            documents: solo_storage::DocumentConfig::default(),
            auth: None,
            audit: solo_storage::AuditSettings::default(),
            redaction: solo_storage::RedactionConfig::default(),
            llm: None,
            triples: solo_storage::TriplesConfig::default(),
            sampling: solo_storage::SamplingConfig::default(),
            steward: solo_storage::StewardSettings::default(),
        }
    }

    struct Harness {
        server: SoloMcpServer,
        _tmp: tempfile::TempDir,
        db_path: std::path::PathBuf,
        write_handle_extra: Option<solo_storage::WriteHandle>,
        join: Option<std::thread::JoinHandle<()>>,
    }

    impl Harness {
        fn new(runtime: &tokio::runtime::Runtime) -> Self {
            use solo_storage::embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};

            let tmp = tempfile::TempDir::new().unwrap();
            let dim = 16usize;
            let hnsw: StdArc<dyn VectorIndex + Send + Sync> =
                StdArc::new(StubVectorIndex::new(dim));
            let embedder: StdArc<dyn solo_core::Embedder> =
                StdArc::new(StubEmbedder::new("stub", "v1", dim));

            let conn = solo_storage::test_support::open_test_db_at(&tmp.path().join("test.db"));
            let embedder_id = get_or_insert_embedder_id(
                &conn,
                &EmbedderIdentity {
                    name: "stub".into(),
                    version: "v1".into(),
                    dim: dim as u32,
                    dtype: "f32".into(),
                },
            )
            .expect("register stub embedder");
            let WriterSpawn { handle, join } =
                WriterActor::spawn_full(conn, hnsw.clone(), tmp.path().to_path_buf(), embedder_id);

            // ReaderPool's deadpool::Pool needs a live tokio runtime for
            // both build + drop; build inside block_on.
            let path = tmp.path().join("test.db");
            let pool: ReaderPool =
                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });

            let tenant_id = solo_core::TenantId::default_tenant();
            let tenant_handle = StdArc::new(TenantHandle::from_parts_for_tests(
                tenant_id.clone(),
                fake_config(dim as u32),
                path.clone(),
                tmp.path().to_path_buf(),
                embedder_id,
                hnsw,
                embedder.clone(),
                handle.clone(),
                std::thread::spawn(|| {}),
                pool,
            ));
            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
                tmp.path().to_path_buf(),
                key,
                embedder,
                tenant_handle.clone(),
            ));
            let server = SoloMcpServer::new_for_tenant(registry, tenant_handle, Vec::new());
            Harness {
                server,
                _tmp: tmp,
                db_path: path,
                write_handle_extra: Some(handle),
                join: Some(join),
            }
        }

        fn open_db(&self) -> rusqlite::Connection {
            solo_storage::test_support::open_test_db_at(&self.db_path)
        }

        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
            // The whole shutdown runs inside block_on so deadpool-sqlite's
            // drop (which schedules cleanup on the active runtime) sees a
            // live reactor. Without this, dropping the SoloMcpServer
            // (which holds the ReaderPool through its Arc<Inner>) panics
            // with "no reactor running".
            let join = self.join.take();
            let extra = self.write_handle_extra.take();
            runtime.block_on(async move {
                drop(extra);
                drop(self.server);
                drop(self._tmp);
                if let Some(join) = join {
                    let (tx, rx) = std::sync::mpsc::channel();
                    std::thread::spawn(move || {
                        let _ = tx.send(join.join());
                    });
                    tokio::task::spawn_blocking(move || {
                        rx.recv_timeout(std::time::Duration::from_secs(5))
                    })
                    .await
                    .expect("blocking task")
                    .expect("writer thread did not exit within 5s")
                    .expect("writer thread panicked");
                }
            });
        }
    }

    fn rt() -> tokio::runtime::Runtime {
        tokio::runtime::Builder::new_multi_thread()
            .worker_threads(2)
            .enable_all()
            .build()
            .unwrap()
    }

    /// Pull the first Content::text body out of a CallToolResult. Use
    /// serde_json roundtrip as a robust extractor — `Content`'s public
    /// API doesn't directly expose the inner text without going through
    /// pattern-matching on RawContent.
    fn first_text(r: &rmcp::model::CallToolResult) -> String {
        let first = r.content.first().expect("at least one content item");
        let v = serde_json::to_value(first).expect("content serialises");
        v.get("text")
            .and_then(|t| t.as_str())
            .map(|s| s.to_string())
            .unwrap_or_else(|| format!("{v}"))
    }

    fn seed_episode(conn: &rusqlite::Connection, content: &str) -> (MemoryId, i64) {
        let memory_id = MemoryId::new();
        conn.execute(
            "INSERT INTO episodes
                (memory_id, ts_ms, source_type, content, confidence, strength,
                 salience, tier, status, created_at_ms, updated_at_ms)
             VALUES (?1, 0, 'test', ?2, 0.9, 0.5, 0.5, 'hot', 'active', 0, 0)",
            rusqlite::params![memory_id.to_string(), content],
        )
        .expect("seed episode");
        (memory_id, conn.last_insert_rowid())
    }

    fn seed_triple_row(
        conn: &rusqlite::Connection,
        triple_id: &str,
        subject: &str,
        predicate: &str,
        object: &str,
        source_episode_rowid: Option<i64>,
    ) {
        conn.execute(
            "INSERT INTO triples
                 (triple_id, subject_id, predicate, object_id, object_kind,
                  valid_from_ms, valid_to_ms, confidence, provenance_json,
                  status, created_at_ms, updated_at_ms, source_episode_id)
                 VALUES (?1, ?2, ?3, ?4, 'literal', 0, NULL, 0.9, '{}',
                         'active', 0, 0, ?5)",
            rusqlite::params![triple_id, subject, predicate, object, source_episode_rowid],
        )
        .expect("seed triple");
    }

    fn seed_contradiction_row(conn: &rusqlite::Connection, a_id: &str, b_id: &str, kind: &str) {
        conn.execute(
            "INSERT INTO contradictions
                 (a_memory_id, b_memory_id, kind, explanation, detected_at_ms,
                  status, resolved_at_ms, resolution_note, winning_triple_id)
                 VALUES (?1, ?2, ?3, 'test contradiction', 0,
                         'unresolved', NULL, NULL, NULL)",
            rusqlite::params![a_id, b_id, kind],
        )
        .expect("seed contradiction");
    }

    #[test]
    fn tools_list_returns_eighteen_canonical_tools() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let tools = h.server.dispatch_list_tools();
        let names: Vec<&str> = tools.iter().map(|t| t.name.as_ref()).collect();
        assert_eq!(
            names,
            vec![
                "memory_remember",
                // v0.9.2 — batched-remember for agentic clients.
                "memory_remember_batch",
                "memory_recall",
                "memory_context",
                "memory_forget",
                "memory_inspect",
                "memory_update",
                // Derived-layer tools added in v0.4.0:
                "memory_themes",
                "memory_facts_about",
                "memory_entities",
                "memory_contradictions",
                "memory_contradiction_resolve",
                // Added in v0.5.0 (Priority 3):
                "memory_inspect_cluster",
                // Document tools added in v0.7.0:
                "memory_ingest_document",
                "memory_search_docs",
                "memory_inspect_document",
                "memory_list_documents",
                "memory_forget_document",
            ]
        );
        for t in &tools {
            // rmcp 1.x: Tool.description is Option<Cow<'static, str>>.
            let desc = t.description.as_deref().unwrap_or("");
            assert!(!desc.is_empty(), "{} description empty", t.name);
            let _schema = t.schema_as_json_value();
            // `required` is intentionally absent on memory_themes +
            // memory_contradictions + memory_list_documents (all args
            // optional with defaults). memory_facts_about has required
            // = ["subject"], etc. We don't assert per-tool 'required'
            // shape here; the schema's `properties` field is the more
            // important signal and is always present.
        }
        h.shutdown(&runtime);
    }

    #[test]
    fn themes_returns_json_array_on_empty_db() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool("memory_themes", json!({}), None)
                .await
                .expect("themes succeeds");
            let text = first_text(&r);
            // Empty derived layer → empty array JSON. Parses cleanly.
            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
            assert!(v.is_array(), "expected array, got: {text}");
            assert_eq!(v.as_array().unwrap().len(), 0);
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn themes_passes_through_window_and_limit_args() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            // Should not crash with optional + integer args present.
            let r = h
                .server
                .dispatch_tool(
                    "memory_themes",
                    json!({ "window_days": 7, "limit": 20 }),
                    None,
                )
                .await
                .expect("themes with args succeeds");
            let text = first_text(&r);
            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
            assert!(v.is_array());
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn facts_about_rejects_empty_subject() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool("memory_facts_about", json!({ "subject": "   " }), None)
                .await
                .expect_err("empty subject must error");
            // McpError doesn't expose a clean kind/message accessor; just
            // verify the error fires (validation path reached).
            let s = format!("{err:?}");
            assert!(
                s.to_lowercase().contains("subject") || s.to_lowercase().contains("invalid"),
                "got: {s}"
            );
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn facts_about_returns_array_for_unknown_subject() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool(
                    "memory_facts_about",
                    json!({ "subject": "NobodyKnowsThisSubject" }),
                    None,
                )
                .await
                .expect("facts_about with unknown subject succeeds");
            let text = first_text(&r);
            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
            assert_eq!(v.as_array().unwrap().len(), 0);
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn facts_about_accepts_include_as_object_arg() {
        // Asserts the v0.5.1 P8 arg is parsed (serde default lets it
        // be omitted) and forwarded to the query lib without choking
        // the dispatcher. We don't seed triples — what we need to
        // verify is that the optional bool flows through. Both with
        // and without the arg, dispatch succeeds and returns an
        // empty array. (Functional coverage of the object-position
        // widening lives in the query-crate tests.)
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            // With include_as_object=true.
            let r = h
                .server
                .dispatch_tool(
                    "memory_facts_about",
                    json!({ "subject": "Maya", "include_as_object": true }),
                    None,
                )
                .await
                .expect("dispatch with include_as_object=true succeeds");
            let v: serde_json::Value =
                serde_json::from_str(&first_text(&r)).expect("parses as json");
            assert_eq!(v.as_array().unwrap().len(), 0);

            // Omitted entirely — must default to false (no error).
            let r = h
                .server
                .dispatch_tool("memory_facts_about", json!({ "subject": "Maya" }), None)
                .await
                .expect("dispatch without include_as_object succeeds (default false)");
            let v: serde_json::Value =
                serde_json::from_str(&first_text(&r)).expect("parses as json");
            assert_eq!(v.as_array().unwrap().len(), 0);
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn contradictions_returns_json_array_on_empty_db() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool("memory_contradictions", json!({}), None)
                .await
                .expect("contradictions succeeds");
            let text = first_text(&r);
            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
            assert!(v.is_array());
            assert_eq!(v.as_array().unwrap().len(), 0);
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn entities_returns_matching_graph_entities() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            let (_memory_id, rowid) = seed_episode(&conn, "Alice graph seed");
            seed_triple_row(
                &conn,
                "t-mcp-entity-1",
                "Alice",
                "knows",
                "Bob",
                Some(rowid),
            );
        }
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool("memory_entities", json!({ "query": "Ali" }), None)
                .await
                .expect("entities succeeds");
            let v: serde_json::Value =
                serde_json::from_str(&first_text(&r)).expect("parses as json");
            assert!(
                v.as_array()
                    .unwrap()
                    .iter()
                    .any(|row| row.get("entity_id").and_then(|id| id.as_str()) == Some("Alice")),
                "expected Alice entity, got {v}"
            );
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn contradiction_resolve_updates_lifecycle() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        {
            let conn = h.open_db();
            let (_memory_id, rowid) = seed_episode(&conn, "contradiction seed");
            seed_triple_row(&conn, "t-mcp-a", "Alice", "likes", "tea", Some(rowid));
            seed_triple_row(&conn, "t-mcp-b", "Alice", "likes", "coffee", Some(rowid));
            seed_contradiction_row(&conn, "t-mcp-a", "t-mcp-b", "other");
        }
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool(
                    "memory_contradiction_resolve",
                    json!({
                        "a_id": "t-mcp-a",
                        "b_id": "t-mcp-b",
                        "kind": "other",
                        "resolution_note": "tea is current",
                        "winning_triple_id": "t-mcp-a"
                    }),
                    None,
                )
                .await
                .expect("resolve succeeds");
            let resolved: serde_json::Value =
                serde_json::from_str(&first_text(&r)).expect("parses as json");
            assert_eq!(
                resolved.get("status").and_then(|v| v.as_str()),
                Some("resolved")
            );
            assert!(
                resolved
                    .get("resolved_at_ms")
                    .and_then(|v| v.as_i64())
                    .is_some()
            );
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn remember_then_recall_round_trip() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        // Use &h.server directly (no clone) so the only outstanding
        // reference at shutdown time is the harness's own. The clone
        // path triggered a 5-second writer-thread timeout because the
        // local clone held an Arc<Inner> with its own WriteHandle past
        // h.shutdown().
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool(
                    "memory_remember",
                    json!({ "content": "the cat sat on the mat" }),
                    None,
                )
                .await
                .expect("remember succeeds");
            let text = first_text(&r);
            assert!(text.starts_with("remembered "), "got: {text}");

            let r = h
                .server
                .dispatch_tool(
                    "memory_recall",
                    json!({ "query": "the cat sat on the mat", "limit": 5 }),
                    None,
                )
                .await
                .expect("recall succeeds");
            let text = first_text(&r);
            assert!(text.contains("the cat sat on the mat"), "got: {text}");
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn update_rewrites_memory_content() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool(
                    "memory_remember",
                    json!({ "content": "old mcp transport memory" }),
                    None,
                )
                .await
                .expect("remember succeeds");
            let text = first_text(&r);
            let mid = text
                .strip_prefix("remembered ")
                .expect("remembered prefix")
                .to_string();

            let r = h
                .server
                .dispatch_tool(
                    "memory_update",
                    json!({
                        "memory_id": mid,
                        "content": "new mcp transport memory"
                    }),
                    None,
                )
                .await
                .expect("update succeeds");
            let updated: serde_json::Value =
                serde_json::from_str(&first_text(&r)).expect("parses as json");
            assert_eq!(
                updated.get("content").and_then(|v| v.as_str()),
                Some("new mcp transport memory")
            );
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn memory_context_returns_json_bundle() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            h.server
                .dispatch_tool(
                    "memory_remember",
                    json!({ "content": "memory context round trip" }),
                    None,
                )
                .await
                .expect("remember succeeds");

            let r = h
                .server
                .dispatch_tool(
                    "memory_context",
                    json!({ "query": "memory context", "limit": 5 }),
                    None,
                )
                .await
                .expect("memory_context succeeds");
            let text = first_text(&r);
            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
            assert_eq!(v["query"], "memory context");
            assert!(
                v["recall"]["hits"]
                    .as_array()
                    .unwrap()
                    .iter()
                    .any(|h| h["content"] == "memory context round trip"),
                "context recall should include remembered content: {v}"
            );
            assert!(v["themes"].is_array());
            assert!(v["facts"].is_array());
            assert!(v["contradictions"].is_array());
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn forget_excludes_row_from_subsequent_recall() {
        let runtime = rt();
        let h = Harness::new(&runtime);

        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool(
                    "memory_remember",
                    json!({ "content": "to be forgotten" }),
                    None,
                )
                .await
                .unwrap();
            let text = first_text(&r);
            let mid = text.strip_prefix("remembered ").unwrap().to_string();

            h.server
                .dispatch_tool(
                    "memory_forget",
                    json!({ "memory_id": mid, "reason": "test" }),
                    None,
                )
                .await
                .expect("forget succeeds");

            let r = h
                .server
                .dispatch_tool(
                    "memory_recall",
                    json!({ "query": "to be forgotten", "limit": 5 }),
                    None,
                )
                .await
                .unwrap();
            let text = first_text(&r);
            assert!(
                !text.contains(r#""content": "to be forgotten""#),
                "forgotten row should be excluded; got: {text}"
            );
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn empty_remember_returns_invalid_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool("memory_remember", json!({ "content": "" }), None)
                .await
                .unwrap_err();
            assert!(format!("{err:?}").contains("must not be empty"));
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn empty_recall_query_returns_invalid_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool("memory_recall", json!({ "query": "   " }), None)
                .await
                .unwrap_err();
            assert!(format!("{err:?}").contains("must not be empty"));
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_with_invalid_id_returns_invalid_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool("memory_inspect", json!({ "memory_id": "not-a-uuid" }), None)
                .await
                .unwrap_err();
            assert!(format!("{err:?}").contains("invalid memory_id"));
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn forget_unknown_id_returns_invalid_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            // Valid UUID format but not in episodes — handle_forget
            // surfaces NotFound, mapped to invalid_params per
            // solo_to_mcp.
            let err = h
                .server
                .dispatch_tool(
                    "memory_forget",
                    json!({ "memory_id": "00000000-0000-7000-8000-000000000000" }),
                    None,
                )
                .await
                .unwrap_err();
            assert!(format!("{err:?}").contains("not found"));
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn unknown_tool_name_returns_invalid_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool("memory.summon", json!({}), None)
                .await
                .unwrap_err();
            assert!(format!("{err:?}").contains("unknown tool"));
        });
        h.shutdown(&runtime);
    }

    /// Regression guard for v0.4.1's MCP tool name fix, generalised
    /// in v0.5.0 Priority 4 to cover **all three** major LLM
    /// providers, not just Anthropic.
    ///
    /// Each provider enforces its own tool-name regex on the
    /// function-calling wire. A tool name has to satisfy ALL of them
    /// to be portable across clients:
    ///
    ///   - **Anthropic**: `^[a-zA-Z0-9_-]{1,64}$` (what shipped in
    ///     v0.4.1; failing this rejects the entire toolset on Claude
    ///     Desktop / Cursor / Claude Code with
    ///     `FrontendRemoteMcpToolDefinition.name: String should
    ///     match pattern ...`).
    ///   - **OpenAI** function-calling: `^[a-zA-Z_][a-zA-Z0-9_-]*$`
    ///     with length ≤ 64 (must start with letter or underscore).
    ///   - **Gemini** function-calling: documented as a-z, A-Z, 0-9,
    ///     underscores and dashes; some sources also allow dots. We
    ///     use the conservative intersection — must start with
    ///     letter or underscore, alphanumeric + underscore only (no
    ///     hyphen, no dot), length ≤ 63. This is the strictest of
    ///     the three patterns, so any tool that passes it also
    ///     passes the other two. Sources differ on whether Gemini
    ///     accepts dots or hyphens; the strictest reading guards us
    ///     against the future where one provider tightens the regex
    ///     (which is the failure mode v0.4.1 hit on Anthropic). See
    ///     <https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/docs/api/google/generativeai/protos/FunctionDeclaration.md>
    ///     and <https://ai.google.dev/gemini-api/docs/function-calling>.
    ///
    /// Lesson banked v0.3 #8: rmcp framing tests pass dot-named
    /// tools fine because rmcp's own client-side validation is
    /// permissive. Only the downstream provider API enforces the
    /// regex. This test gates the names at `cargo test` time so any
    /// future tool-name change has to pass all three provider
    /// regexes before reaching real clients.
    #[test]
    fn tool_names_match_cross_provider_regex() {
        /// Anthropic API name regex: `^[a-zA-Z0-9_-]{1,64}$`.
        fn passes_anthropic(name: &str) -> bool {
            let len = name.len();
            if !(1..=64).contains(&len) {
                return false;
            }
            name.chars()
                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
        }

        /// OpenAI function-calling name regex:
        /// `^[a-zA-Z_][a-zA-Z0-9_-]*$`, length ≤ 64.
        fn passes_openai(name: &str) -> bool {
            let len = name.len();
            if !(1..=64).contains(&len) {
                return false;
            }
            let mut chars = name.chars();
            let first = match chars.next() {
                Some(c) => c,
                None => return false,
            };
            if !(first.is_ascii_alphabetic() || first == '_') {
                return false;
            }
            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
        }

        /// Gemini function-calling name regex (conservative
        /// reading): `^[a-zA-Z_][a-zA-Z0-9_]*$`, length ≤ 63. No
        /// hyphen, no dot — strictest of the three so any name that
        /// passes this passes the other two.
        fn passes_gemini(name: &str) -> bool {
            let len = name.len();
            if !(1..=63).contains(&len) {
                return false;
            }
            let mut chars = name.chars();
            let first = match chars.next() {
                Some(c) => c,
                None => return false,
            };
            if !(first.is_ascii_alphabetic() || first == '_') {
                return false;
            }
            chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
        }

        let tools = build_tools();
        assert_eq!(
            tools.len(),
            18,
            "expected 18 tools (context + update/entities/resolve + v0.5.x + document tools + remember_batch)"
        );
        // Sanity-check that tool_names() agrees with build_tools().
        let tool_name_strings: Vec<String> = tools.iter().map(|t| t.name.to_string()).collect();
        let public_names: Vec<String> = super::tool_names().iter().map(|s| s.to_string()).collect();
        assert_eq!(
            tool_name_strings, public_names,
            "tool_names() drifted from build_tools() — keep them in sync"
        );

        for t in tools {
            assert!(
                passes_anthropic(&t.name),
                "tool name {:?} fails Anthropic regex \
                 ^[a-zA-Z0-9_-]{{1,64}}$ — see v0.3 lesson #8",
                t.name
            );
            assert!(
                passes_openai(&t.name),
                "tool name {:?} fails OpenAI function-calling regex \
                 ^[a-zA-Z_][a-zA-Z0-9_-]*$ (len ≤ 64)",
                t.name
            );
            assert!(
                passes_gemini(&t.name),
                "tool name {:?} fails Gemini function-calling regex \
                 ^[a-zA-Z_][a-zA-Z0-9_]*$ (len ≤ 63, strict)",
                t.name
            );
        }
    }

    /// Regression guard (dev-log 0152 finding M3): the
    /// `memory_remember_batch` JSON Schema's `items.maxItems` must equal
    /// the runtime cap `solo_storage::MAX_REMEMBER_BATCH_SIZE`. The
    /// schema is now derived from the constant, but pin the literal so a
    /// future drift (someone hard-codes `200` again) is caught.
    #[test]
    fn remember_batch_maxitems_matches_max_batch_size() {
        let tools = build_tools();
        let batch = tools
            .iter()
            .find(|t| t.name == "memory_remember_batch")
            .expect("memory_remember_batch tool is missing");
        let schema = serde_json::to_value(&batch.input_schema)
            .expect("input_schema serialises as JSON");
        let max_items = schema
            .get("properties")
            .and_then(|p| p.get("items"))
            .and_then(|i| i.get("maxItems"))
            .and_then(|n| n.as_u64())
            .expect("memory_remember_batch.items.maxItems missing or not a u64");
        assert_eq!(
            max_items as usize,
            solo_storage::MAX_REMEMBER_BATCH_SIZE,
            "memory_remember_batch schema maxItems ({}) must equal \
             solo_storage::MAX_REMEMBER_BATCH_SIZE ({}). If the cap \
             changed, update both — but you should never need to: the \
             schema now interpolates the constant directly.",
            max_items,
            solo_storage::MAX_REMEMBER_BATCH_SIZE,
        );
    }

    /// Regression guard for the v0.5.0 Priority 4 jargon pass.
    ///
    /// Tool descriptions and `get_info().instructions` are the first
    /// (and often only) thing a calling LLM reads when its
    /// tool-search mechanism decides whether Solo's tools are
    /// relevant. Earlier descriptions leaned on Solo-internal
    /// vocabulary (`SPO`, `Steward`, `LEFT JOIN`, `candidate pair`,
    /// `tagged_with`) which doesn't pattern-match natural-language
    /// agent queries like "what do you know about Alex?" — that's
    /// the load-bearing v0.5.0 finding from the 2026-05-14
    /// thesis-test in Claude Desktop.
    ///
    /// This test pins the de-jargoning by forbidding the old
    /// vocabulary from appearing in any user-facing text. Future
    /// contributors who reach for jargon trip the test and have to
    /// pick plain-English phrasing instead.
    #[test]
    fn tool_descriptions_avoid_internal_jargon() {
        // Case-insensitive substring match. Drawn from the
        // pre-Priority-4 descriptions; expand only if a new term
        // creeps in.
        const FORBIDDEN: &[&str] = &[
            "SPO",
            "Steward",
            "Steward-flagged",
            "LEFT JOIN",
            "candidate pair",
            "candidate_pair",
            "tagged_with",
        ];

        fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
            haystack.to_lowercase().contains(&needle.to_lowercase())
        }

        // 1. Each tool description.
        for t in build_tools() {
            let desc = t.description.as_deref().unwrap_or("");
            for term in FORBIDDEN {
                assert!(
                    !contains_case_insensitive(desc, term),
                    "tool {:?} description contains forbidden jargon \
                     {:?} — rewrite in plain English (see v0.5.0 \
                     Priority 4)",
                    t.name,
                    term,
                );
            }
        }

        // 2. The server-level instructions (what tool-search sees
        // first).
        let server_info = harness_server_info();
        let instructions = server_info
            .instructions
            .as_deref()
            .expect("get_info() must set instructions");
        for term in FORBIDDEN {
            assert!(
                !contains_case_insensitive(instructions, term),
                "get_info().instructions contains forbidden jargon \
                 {:?} — rewrite in plain English",
                term,
            );
        }
    }

    /// Build a `ServerInfo` for the jargon test without spinning up
    /// the full harness (which needs tokio + tempdir). The
    /// `ServerHandler::get_info()` method doesn't take `&self` state
    /// in any meaningful way for our impl — it returns a static
    /// `ServerInfo` literal — so we construct a minimal-input server
    /// just to call it.
    fn harness_server_info() -> rmcp::model::ServerInfo {
        let runtime = rt();
        let h = Harness::new(&runtime);
        let info = ServerHandler::get_info(&h.server);
        h.shutdown(&runtime);
        info
    }

    /// Regression guard for the v0.9.0 → v0.9.1 P1 Fix 1 MCP
    /// `serverInfo` identity regression.
    ///
    /// In v0.9.0, P0a's rmcp 0.1.5 → 1.7 bump replaced the explicit
    /// `Implementation::new("solo", "<version>")` constructor with
    /// `Implementation::from_build_env()`. That helper reads
    /// `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from **rmcp's own** build
    /// environment (the proc-macro expansion captures rmcp's
    /// `Cargo.toml`, not the consumer's). Every Solo MCP daemon on
    /// v0.9.0 self-identified as `{name: "rmcp", version: "1.7.0"}`
    /// instead of `{name: "solo", version: "<workspace.version>"}`.
    ///
    /// Pins:
    ///   - `name == "solo"` (the operator-facing binary name, not
    ///     `"solo-api"` which would come from
    ///     `env!("CARGO_PKG_NAME")` against this crate's manifest);
    ///   - `version == env!("CARGO_PKG_VERSION")` from solo-api's own
    ///     compile environment (this is the workspace.package version
    ///     via inheritance, so it stays in sync with `solo --version`
    ///     and `solo-cli`'s identity).
    #[test]
    fn server_info_identity_is_solo_not_rmcp_or_solo_api() {
        let info = harness_server_info();
        let name = info.server_info.name.as_str();
        let version = info.server_info.version.as_str();
        assert_eq!(
            name, "solo",
            "MCP serverInfo.name must be \"solo\" (not \"rmcp\" or \
             \"solo-api\"). got name={name:?} version={version:?}"
        );
        assert_eq!(
            version,
            env!("CARGO_PKG_VERSION"),
            "MCP serverInfo.version must match solo-api's compile-time \
             CARGO_PKG_VERSION (i.e. the workspace.package version); \
             a mismatch means we regressed back to rmcp's build env. \
             got version={version:?}"
        );
    }

    // ---- memory_inspect_cluster (v0.5.0 Priority 3) ----

    #[test]
    fn inspect_cluster_unknown_id_returns_invalid_params() {
        // NotFound from solo_query::inspect_cluster is mapped through
        // `solo_to_mcp` to `invalid_params` (MCP has no separate
        // not-found error shape). Error message should name the id.
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool(
                    "memory_inspect_cluster",
                    json!({ "cluster_id": "no-such-cluster" }),
                    None,
                )
                .await
                .expect_err("unknown cluster must error");
            let s = format!("{err:?}");
            assert!(
                s.contains("no-such-cluster") || s.to_lowercase().contains("not found"),
                "expected error to mention the missing cluster id; got: {s}"
            );
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_cluster_rejects_empty_id() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool(
                    "memory_inspect_cluster",
                    json!({ "cluster_id": "   " }),
                    None,
                )
                .await
                .expect_err("blank cluster_id must error");
            let s = format!("{err:?}");
            assert!(
                s.to_lowercase().contains("cluster_id")
                    || s.to_lowercase().contains("must not be empty"),
                "got: {s}"
            );
        });
        h.shutdown(&runtime);
    }

    // ---- Document tools (v0.7.0 P5) ----
    //
    // The five document handlers each have two arg-shape tests:
    //   - arg-struct parses from JSON (serde round-trip; defaults work).
    //   - dispatch arm routes to the handler (we observe behaviour via
    //     a known empty-DB response — bad routing surfaces as
    //     "unknown tool" or wrong shape).
    //
    // Functional coverage (ingest → search → inspect → forget) lives in
    // `crates/solo-cli/tests/mcp_smoke.rs` where a real subprocess + real
    // writer-with-embedder is wired up. The in-process Harness here uses
    // `WriterActor::spawn` which doesn't carry an embedder, so ingest /
    // search themselves return an error — but the dispatch + arg-parse
    // paths exercise correctly.

    #[test]
    fn ingest_document_args_parse_with_required_path() {
        let v: IngestDocumentArgs =
            serde_json::from_value(json!({ "path": "/tmp/notes.md" })).expect("parses");
        assert_eq!(v.path, "/tmp/notes.md");
        // path is required — missing must reject at deserialization.
        let err = serde_json::from_value::<IngestDocumentArgs>(json!({})).unwrap_err();
        assert!(format!("{err}").contains("path"));
    }

    #[test]
    fn search_docs_args_parse_with_default_limit() {
        let v: SearchDocsArgs =
            serde_json::from_value(json!({ "query": "backups" })).expect("parses");
        assert_eq!(v.query, "backups");
        assert_eq!(v.limit, 5, "default limit must be 5");
        let v: SearchDocsArgs =
            serde_json::from_value(json!({ "query": "backups", "limit": 20 })).expect("parses");
        assert_eq!(v.limit, 20);
    }

    #[test]
    fn inspect_document_args_parse_with_required_doc_id() {
        let v: InspectDocumentArgs =
            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
        assert_eq!(v.doc_id, "abc");
        let err = serde_json::from_value::<InspectDocumentArgs>(json!({})).unwrap_err();
        assert!(format!("{err}").contains("doc_id"));
    }

    #[test]
    fn list_documents_args_parse_with_all_defaults() {
        let v: ListDocumentsArgs = serde_json::from_value(json!({})).expect("parses");
        assert_eq!(v.limit, 20, "default limit must be 20");
        assert_eq!(v.offset, 0, "default offset must be 0");
        assert!(
            !v.include_forgotten,
            "default include_forgotten must be false"
        );
        let v: ListDocumentsArgs =
            serde_json::from_value(json!({ "limit": 5, "offset": 10, "include_forgotten": true }))
                .expect("parses");
        assert_eq!(v.limit, 5);
        assert_eq!(v.offset, 10);
        assert!(v.include_forgotten);
    }

    #[test]
    fn forget_document_args_parse_with_required_doc_id() {
        let v: ForgetDocumentArgs =
            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
        assert_eq!(v.doc_id, "abc");
        let err = serde_json::from_value::<ForgetDocumentArgs>(json!({})).unwrap_err();
        assert!(format!("{err}").contains("doc_id"));
    }

    #[test]
    fn ingest_document_rejects_empty_path() {
        // Reaches the dispatch arm → handle_ingest_document → empty
        // guard fires before the writer is touched. Proves routing.
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool("memory_ingest_document", json!({ "path": "" }), None)
                .await
                .expect_err("empty path must error");
            let s = format!("{err:?}");
            assert!(
                s.to_lowercase().contains("path") || s.to_lowercase().contains("must not be empty"),
                "got: {s}"
            );
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn search_docs_rejects_empty_query() {
        // Empty query trips solo_query::run_doc_search's validation
        // → InvalidInput → invalid_params.
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool("memory_search_docs", json!({ "query": "   " }), None)
                .await
                .expect_err("empty query must error");
            let s = format!("{err:?}");
            assert!(
                s.to_lowercase().contains("must not be empty")
                    || s.to_lowercase().contains("invalid"),
                "got: {s}"
            );
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_document_unknown_id_returns_invalid_params() {
        // Valid UUID format but no row exists → handler returns
        // invalid_params with the missing id in the message.
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool(
                    "memory_inspect_document",
                    json!({ "doc_id": "00000000-0000-7000-8000-000000000000" }),
                    None,
                )
                .await
                .expect_err("unknown doc must error");
            let s = format!("{err:?}");
            assert!(
                s.to_lowercase().contains("not found"),
                "expected 'not found' message; got: {s}"
            );
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn inspect_document_rejects_malformed_id() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool(
                    "memory_inspect_document",
                    json!({ "doc_id": "not-a-uuid" }),
                    None,
                )
                .await
                .expect_err("malformed doc_id must error");
            let s = format!("{err:?}");
            assert!(s.contains("invalid doc_id"), "got: {s}");
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn list_documents_returns_empty_array_on_empty_db() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool("memory_list_documents", json!({}), None)
                .await
                .expect("list succeeds");
            let text = first_text(&r);
            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
            assert!(v.is_array(), "expected array, got: {text}");
            assert_eq!(v.as_array().unwrap().len(), 0);
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn list_documents_passes_through_limit_offset_include_args() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool(
                    "memory_list_documents",
                    json!({ "limit": 5, "offset": 10, "include_forgotten": true }),
                    None,
                )
                .await
                .expect("list with args succeeds");
            let text = first_text(&r);
            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
            assert!(v.is_array());
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn forget_document_rejects_malformed_id() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool(
                    "memory_forget_document",
                    json!({ "doc_id": "not-a-uuid" }),
                    None,
                )
                .await
                .expect_err("malformed doc_id must error");
            let s = format!("{err:?}");
            assert!(s.contains("invalid doc_id"), "got: {s}");
        });
        h.shutdown(&runtime);
    }

    // -----------------------------------------------------------------
    // v0.9.2 — `memory_remember_batch` + `salience` MCP layer tests.
    // -----------------------------------------------------------------

    /// salience round-trip through `memory_remember`: an explicit
    /// in-range value reaches the writer; an absent value defaults
    /// to 0.5; an out-of-range value is rejected with invalid_params.
    #[test]
    fn remember_with_explicit_salience_round_trips() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let r = h
                .server
                .dispatch_tool(
                    "memory_remember",
                    json!({ "content": "with salience", "salience": 0.83 }),
                    None,
                )
                .await
                .expect("remember w/ salience succeeds");
            let text = first_text(&r);
            // Confirmation includes the new MemoryId.
            assert!(text.starts_with("remembered "), "got: {text}");
        });
        h.shutdown(&runtime);
    }

    #[test]
    fn remember_with_out_of_range_salience_returns_invalid_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool(
                    "memory_remember",
                    json!({ "content": "out of range", "salience": 1.5 }),
                    None,
                )
                .await
                .unwrap_err();
            let s = format!("{err:?}");
            assert!(s.contains("salience must be"), "got: {s}");
        });
        h.shutdown(&runtime);
    }

    /// Salience boundary: 0.0 and 1.0 are both valid (inclusive range).
    #[test]
    fn remember_with_boundary_salience_succeeds() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            for s in [0.0_f64, 1.0_f64] {
                let r = h
                    .server
                    .dispatch_tool(
                        "memory_remember",
                        json!({ "content": format!("boundary-{s}"), "salience": s }),
                        None,
                    )
                    .await
                    .expect("boundary salience succeeds");
                assert!(first_text(&r).starts_with("remembered "));
            }
        });
        h.shutdown(&runtime);
    }

    /// Happy-path batch: 3 items go in, 3 memory_ids come out in order.
    #[test]
    fn remember_batch_returns_ids_in_order() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let items = json!([
                { "content": "batch-a" },
                { "content": "batch-b", "source_type": "user_preference", "salience": 0.9 },
                { "content": "batch-c", "salience": 0.1 },
            ]);
            let r = h
                .server
                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
                .await
                .expect("batch succeeds");
            let text = first_text(&r);
            let parsed: serde_json::Value = serde_json::from_str(&text).expect("reply is JSON");
            let arr = parsed.as_array().expect("reply is array");
            assert_eq!(arr.len(), 3, "3 items in → 3 ids out: {text}");
            // Each entry must be a UUID-shaped string.
            for v in arr {
                let s = v.as_str().unwrap_or_else(|| panic!("non-string id: {v}"));
                assert_eq!(s.len(), 36, "UUID-shaped id expected: {s}");
            }
            // Distinct ids.
            let mut ids: Vec<&str> = arr.iter().map(|v| v.as_str().unwrap()).collect();
            ids.sort();
            ids.dedup();
            assert_eq!(ids.len(), 3, "ids must be distinct: {text}");
        });
        h.shutdown(&runtime);
    }

    /// Empty items → invalid_params before any embedding work.
    #[test]
    fn remember_batch_empty_items_returns_invalid_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let err = h
                .server
                .dispatch_tool("memory_remember_batch", json!({ "items": [] }), None)
                .await
                .unwrap_err();
            let s = format!("{err:?}");
            assert!(s.contains("must not be empty"), "got: {s}");
        });
        h.shutdown(&runtime);
    }

    /// Per-item validation: empty content trips invalid_params with the
    /// index of the offending item baked into the message.
    #[test]
    fn remember_batch_rejects_per_item_empty_content() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let items = json!([
                { "content": "ok-1" },
                { "content": "   " },
                { "content": "ok-3" },
            ]);
            let err = h
                .server
                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
                .await
                .unwrap_err();
            let s = format!("{err:?}");
            assert!(s.contains("items[1]"), "must mention items[1]: {s}");
            assert!(s.contains("must not be empty"), "got: {s}");
        });
        h.shutdown(&runtime);
    }

    /// Per-item validation: out-of-range salience trips invalid_params
    /// with the item index in the message.
    #[test]
    fn remember_batch_rejects_per_item_salience_out_of_range() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let items = json!([
                { "content": "ok-1", "salience": 0.5 },
                { "content": "out-of-range", "salience": -0.1 },
            ]);
            let err = h
                .server
                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
                .await
                .unwrap_err();
            let s = format!("{err:?}");
            assert!(s.contains("items[1]"), "must mention items[1]: {s}");
            assert!(s.contains("salience must be"), "got: {s}");
        });
        h.shutdown(&runtime);
    }

    /// Over-cap batch is rejected at the MCP layer so we never embed
    /// 201+ items. Pinned at the same constant as the writer-actor.
    #[test]
    fn remember_batch_over_cap_returns_invalid_params() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let items: Vec<serde_json::Value> = (0..(solo_storage::MAX_REMEMBER_BATCH_SIZE + 1))
                .map(|i| json!({ "content": format!("over-{i}") }))
                .collect();
            let err = h
                .server
                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
                .await
                .unwrap_err();
            let s = format!("{err:?}");
            assert!(
                s.contains("MAX_REMEMBER_BATCH_SIZE"),
                "must mention the cap: {s}"
            );
        });
        h.shutdown(&runtime);
    }

    // -----------------------------------------------------------------
    // v0.11.0 P3: per-tool progress event tests.
    //
    // These tests invoke `dispatch_tool` with a real
    // `ProgressReporter` wired to a fresh `SessionState`, then drain
    // the session's broadcast receiver to observe the emitted events.
    // The pattern mirrors `mcp_progress::tests::progress_reporter_*`
    // but exercises the full handler call stack (including the writer
    // and query pipelines) end-to-end.
    // -----------------------------------------------------------------

    use crate::mcp_progress::{ProgressReporter, ProgressToken};
    use crate::mcp_session::SessionState;
    use std::sync::Arc as StdArc2;

    fn fresh_progress_session() -> StdArc2<SessionState> {
        StdArc2::new(SessionState::new(
            solo_core::TenantId::default_tenant(),
            None,
        ))
    }

    fn drain_progress_events(
        rx: &mut tokio::sync::broadcast::Receiver<crate::mcp_session::McpStreamEvent>,
    ) -> Vec<crate::mcp_session::McpStreamEvent> {
        let mut out = Vec::new();
        while let Ok(ev) = rx.try_recv() {
            out.push(ev);
        }
        out
    }

    // v0.11.0 P3 note: `ingest_document_emits_progress_at_*` test lives
    // in `http::handler_tests` because the dispatch_tests harness uses
    // `WriterActor::spawn` (no embedder), so an end-to-end ingest panics
    // with "writer has no embedder". The handler_tests harness uses
    // `WriterActor::spawn_full` which carries an embedder; we exercise
    // the ingest progress checkpoints there.

    /// v0.11.0 P3: `memory_search_docs` emits 3 progress events when
    /// `top_k` exceeds the threshold (100).
    #[test]
    fn search_docs_emits_progress_only_when_top_k_above_100() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let session = fresh_progress_session();
            let mut rx = session.subscribe_events();
            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!(42)));
            let _r = h
                .server
                .dispatch_tool(
                    "memory_search_docs",
                    json!({ "query": "anything", "limit": 150 }),
                    Some(reporter),
                )
                .await
                .expect("search succeeds");
            let events = drain_progress_events(&mut rx);
            assert_eq!(
                events.len(),
                3,
                "expected 3 search progress events at top_k=150, got {}",
                events.len()
            );
            // Spec shape: every event uses progressToken (echoed as
            // number 42) and walks progress 1..=3.
            for (i, ev) in events.iter().enumerate() {
                let params = &ev.data["params"];
                assert_eq!(params["progressToken"], json!(42));
                assert_eq!(params["total"], json!(3));
                assert_eq!(params["progress"], json!((i + 1) as u64));
            }
        });
        h.shutdown(&runtime);
    }

    /// v0.11.0 P3: `memory_search_docs` with `top_k <= 100` does NOT
    /// emit progress events even when a reporter is wired. Threshold
    /// gating per Decision C.
    #[test]
    fn search_docs_emits_no_progress_when_top_k_below_threshold() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let session = fresh_progress_session();
            let mut rx = session.subscribe_events();
            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("t")));
            let _r = h
                .server
                .dispatch_tool(
                    "memory_search_docs",
                    json!({ "query": "anything", "limit": 50 }),
                    Some(reporter),
                )
                .await
                .expect("search succeeds");
            let events = drain_progress_events(&mut rx);
            assert!(
                events.is_empty(),
                "expected no progress events at top_k=50, got {events:?}"
            );
        });
        h.shutdown(&runtime);
    }

    /// v0.11.0 P3: `memory_remember_batch` with > 50 items emits
    /// per-25-items embed progress + a final "embedded" + "inserted"
    /// event. A 51-item batch fires at items 25, 50, then embedded
    /// (51/51), then inserted (51/51) = 4 events.
    #[test]
    fn remember_batch_emits_progress_only_when_size_above_50() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let session = fresh_progress_session();
            let mut rx = session.subscribe_events();
            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("batch")));
            let items: Vec<serde_json::Value> = (0..51)
                .map(|i| json!({ "content": format!("item-{i}") }))
                .collect();
            let _r = h
                .server
                .dispatch_tool(
                    "memory_remember_batch",
                    json!({ "items": items }),
                    Some(reporter),
                )
                .await
                .expect("batch succeeds");
            let events = drain_progress_events(&mut rx);
            assert_eq!(
                events.len(),
                4,
                "expected 4 batch progress events for 51 items, got {}: {events:?}",
                events.len()
            );
            // First event = 25/51 "embedding"; second = 50/51 "embedding";
            // third = 51/51 "embedded"; fourth = 51/51 "inserted".
            let progresses: Vec<u64> = events
                .iter()
                .map(|e| e.data["params"]["progress"].as_u64().unwrap_or(0))
                .collect();
            assert_eq!(progresses, vec![25, 50, 51, 51]);
            assert_eq!(
                events.last().unwrap().data["params"]["message"],
                json!("inserted")
            );
            for ev in &events {
                assert_eq!(ev.data["params"]["progressToken"], json!("batch"));
                assert_eq!(ev.data["params"]["total"], json!(51));
            }
        });
        h.shutdown(&runtime);
    }

    /// v0.11.0 P3: small batches (<= 50) do NOT emit progress events
    /// even with a reporter wired. Wire-overhead gating per Decision C.
    #[test]
    fn remember_batch_emits_no_progress_when_size_below_threshold() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let session = fresh_progress_session();
            let mut rx = session.subscribe_events();
            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("t")));
            // 5 items — well below the threshold.
            let items: Vec<serde_json::Value> = (0..5)
                .map(|i| json!({ "content": format!("small-{i}") }))
                .collect();
            let _r = h
                .server
                .dispatch_tool(
                    "memory_remember_batch",
                    json!({ "items": items }),
                    Some(reporter),
                )
                .await
                .expect("batch succeeds");
            let events = drain_progress_events(&mut rx);
            assert!(
                events.is_empty(),
                "expected no progress events for 5-item batch, got {events:?}"
            );
        });
        h.shutdown(&runtime);
    }

    /// v0.11.0 P3: stdio-style calls (no session = no progress reporter)
    /// must not panic and must produce no events. This pins the
    /// backward-compat invariant the rmcp `call_tool` path relies on.
    /// Uses `memory_search_docs` (no embedder dependency in the
    /// dispatch_tests harness) — the equivalent ingest_document
    /// "no progress" guarantee is asserted in `http::handler_tests`
    /// via the same `None` path.
    #[test]
    fn stdio_transport_does_not_emit_progress_events() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            // Construct a session purely for the rx end — the tool call
            // gets `None`, so the session must NOT receive anything.
            let session = fresh_progress_session();
            let mut rx = session.subscribe_events();
            let _r = h
                .server
                .dispatch_tool(
                    "memory_search_docs",
                    // Above the threshold so progress WOULD fire if a
                    // reporter were wired — but no reporter = no events.
                    json!({ "query": "anything", "limit": 200 }),
                    None, // stdio: no reporter
                )
                .await
                .expect("search succeeds without reporter");
            let events = drain_progress_events(&mut rx);
            assert!(
                events.is_empty(),
                "stdio path (no reporter) must not publish to ANY session: {events:?}"
            );
        });
        h.shutdown(&runtime);
    }

    /// v0.11.0 P3: emitted event ids are monotonically increasing per
    /// session across multiple tool calls. Pinned to surface any
    /// regression in `SessionState::publish_event`'s id allocator.
    #[test]
    fn progress_event_id_monotonic_per_session() {
        let runtime = rt();
        let h = Harness::new(&runtime);
        runtime.block_on(async {
            let session = fresh_progress_session();
            let mut rx = session.subscribe_events();
            // Two calls in sequence with progress; observe interleaved
            // ids stay strictly increasing.
            let r1 = ProgressReporter::new(session.clone(), ProgressToken(json!("a")));
            let r2 = ProgressReporter::new(session.clone(), ProgressToken(json!("b")));
            let _ = h
                .server
                .dispatch_tool(
                    "memory_search_docs",
                    json!({ "query": "q1", "limit": 150 }),
                    Some(r1),
                )
                .await;
            let _ = h
                .server
                .dispatch_tool(
                    "memory_search_docs",
                    json!({ "query": "q2", "limit": 150 }),
                    Some(r2),
                )
                .await;
            let events = drain_progress_events(&mut rx);
            assert!(events.len() >= 6, "expected at least 6 events: {events:?}");
            let ids: Vec<u64> = events.iter().map(|e| e.id).collect();
            for w in ids.windows(2) {
                assert!(w[0] < w[1], "event ids must be strictly monotonic: {ids:?}");
            }
        });
        h.shutdown(&runtime);
    }
}

// ===========================================================================
// v0.8.1 P2: MCP audit principal extraction
// ===========================================================================
//
// These tests live in their own module because they manipulate the
// `SOLO_MCP_PRINCIPAL_TOKEN` env var, which is process-global mutable
// state. Serialised via a static `Mutex` so cargo test's multi-threaded
// runner doesn't race. Pattern mirrors the env-guard discipline in
// `solo_cli::commands::common::ollama_overrides_tests`.

#[cfg(test)]
mod principal_extraction_tests {
    use super::*;
    use std::sync::Mutex;

    /// Serialise tests that mutate `SOLO_MCP_PRINCIPAL_TOKEN`. Poisoned
    /// guards are recovered via `into_inner` so one panicking test
    /// doesn't sink the rest of the suite.
    static ENV_LOCK: Mutex<()> = Mutex::new(());

    /// RAII guard that unsets the env var on drop, so a panicking test
    /// doesn't leak state into the next case.
    struct EnvGuard;
    impl Drop for EnvGuard {
        fn drop(&mut self) {
            // SAFETY: every caller holds ENV_LOCK across construct + drop.
            unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
        }
    }

    fn set_principal_env(val: &str) -> EnvGuard {
        // SAFETY: ENV_LOCK held by caller.
        unsafe { std::env::set_var(ENV_MCP_PRINCIPAL_TOKEN, val) };
        EnvGuard
    }

    fn clear_principal_env() -> EnvGuard {
        // SAFETY: ENV_LOCK held by caller.
        unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
        EnvGuard
    }

    /// Stdio path: setting `SOLO_MCP_PRINCIPAL_TOKEN` produces a
    /// non-None principal at construction time.
    #[test]
    fn stdio_env_var_resolves_to_principal() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = set_principal_env("alice-token");
        let resolved = resolve_mcp_principal(None);
        assert_eq!(resolved.as_deref(), Some("alice-token"));
    }

    /// Stdio path: absent env var ⇒ `None` (regression — must preserve
    /// v0.8.0 behaviour for users without auth).
    #[test]
    fn stdio_no_env_var_resolves_to_none() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = clear_principal_env();
        assert_eq!(resolve_mcp_principal(None), None);
    }

    /// Stdio path: whitespace-only env var ⇒ `None` (don't pin every
    /// audit row to an empty/blank principal because of a launcher
    /// typo).
    #[test]
    fn stdio_whitespace_env_var_resolves_to_none() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = set_principal_env("   \t  ");
        assert_eq!(resolve_mcp_principal(None), None);
    }

    /// HTTP-MCP path: `Authorization: Bearer <token>` header resolves
    /// to the token as principal.
    #[test]
    fn http_header_resolves_to_bearer_token_principal() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = clear_principal_env();
        let resolved = resolve_mcp_principal(Some("Bearer api-token-xyz"));
        assert_eq!(resolved.as_deref(), Some("api-token-xyz"));
    }

    /// Precedence: when both env var AND header carry a token, the
    /// header wins (consistent with the rest of the auth stack — JWT
    /// claim beats `X-Solo-Tenant` header).
    #[test]
    fn http_header_beats_env_var() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = set_principal_env("env-token");
        let resolved = resolve_mcp_principal(Some("Bearer header-token"));
        assert_eq!(
            resolved.as_deref(),
            Some("header-token"),
            "header MUST win over env var per documented precedence"
        );
    }

    /// HTTP-MCP path: malformed header (no `Bearer ` prefix) ⇒ falls
    /// through to env-var path.
    #[test]
    fn http_malformed_header_falls_through_to_env() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = set_principal_env("env-fallback");
        let resolved = resolve_mcp_principal(Some("Basic dXNlcjpwYXNz"));
        assert_eq!(resolved.as_deref(), Some("env-fallback"));
    }

    /// HTTP-MCP path: empty bearer header (`Bearer ` with no token)
    /// falls through to env-var path. Matches the spirit of the
    /// whitespace-env-var rejection — don't credit a half-formed
    /// header.
    #[test]
    fn http_empty_bearer_header_falls_through_to_env() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = set_principal_env("env-fallback");
        let resolved = resolve_mcp_principal(Some("Bearer   "));
        assert_eq!(resolved.as_deref(), Some("env-fallback"));
    }

    /// Across N consecutive calls of `resolve_mcp_principal`, the
    /// resolved principal is stable for the same env-var setting
    /// (regression guard: an accidental thread-local cache would
    /// break the "stable across N tool calls in one session" contract
    /// the brief calls out).
    #[test]
    fn stable_across_multiple_resolutions() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = set_principal_env("stable-token");
        for _ in 0..5 {
            assert_eq!(resolve_mcp_principal(None).as_deref(), Some("stable-token"));
        }
    }
}

/// v0.9.0 P2 tests for the MCP-initialize-time LLM-config gate.
///
/// Pure-function tests of [`initialize_decision`]: no rmcp Peer is
/// constructed (the type's constructors are private), no MCP handshake
/// is driven. The wire-up between `initialize_decision` and the
/// side-effect path lives in [`SoloMcpServer::initialize`] and is
/// covered indirectly by the audit-row tests in
/// [`crate::llm::sampling::tests`] — those exercise the same
/// `SamplingLlmClient` + `WriteCommand::EmitLlmSamplingAudit` path
/// that `populate_sampling_steward` constructs.
#[cfg(test)]
mod initialize_decision_tests {
    use super::*;
    use solo_storage::LlmSettings;

    /// `[llm]` absent → always Allow (matches v0.8.x behaviour).
    #[test]
    fn no_llm_block_allows_initialize_regardless_of_sampling_capability() {
        assert_eq!(initialize_decision(&None, false), InitializeDecision::Allow);
        assert_eq!(initialize_decision(&None, true), InitializeDecision::Allow);
    }

    /// `[llm] mode = "none"` → always Allow.
    #[test]
    fn llm_none_allows_initialize_regardless_of_sampling_capability() {
        let s = Some(LlmSettings::None);
        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
    }

    /// `[llm] mode = "anthropic"` → always Allow.
    #[test]
    fn llm_anthropic_allows_initialize_regardless_of_sampling_capability() {
        let s = Some(LlmSettings::Anthropic {
            api_key_env: "ANTHROPIC_API_KEY".into(),
            model: "claude-sonnet-4-6".into(),
        });
        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
    }

    /// `[llm] mode = "ollama"` → always Allow.
    #[test]
    fn llm_ollama_allows_initialize_regardless_of_sampling_capability() {
        let s = Some(LlmSettings::Ollama {
            base_url: "http://localhost:11434".into(),
            model: "qwen3-coder:30b".into(),
        });
        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
    }

    /// `[llm] mode = "mcp_sampling"` + peer with sampling capability →
    /// populate the slot.
    #[test]
    fn llm_mcp_sampling_with_sampling_capability_populates_slot() {
        let s = Some(LlmSettings::McpSampling);
        assert_eq!(
            initialize_decision(&s, true),
            InitializeDecision::PopulateSamplingSteward
        );
    }

    /// `[llm] mode = "mcp_sampling"` + peer WITHOUT sampling
    /// capability → reject initialize with the locked BLOCKER 2 error.
    #[test]
    fn llm_mcp_sampling_without_sampling_capability_rejects() {
        let s = Some(LlmSettings::McpSampling);
        assert_eq!(
            initialize_decision(&s, false),
            InitializeDecision::RejectMissingSamplingCapability
        );
    }

    /// The locked BLOCKER 2 error message body is byte-stable: a future
    /// audit-revision can grep these strings and confirm they still
    /// land.
    #[test]
    fn sampling_capability_missing_error_message_contains_all_alternatives() {
        let msg = sampling_capability_missing_error_message();
        // Banner + four alternative blocks.
        assert!(msg.contains("LLM backend `mcp_sampling`"));
        assert!(msg.contains("mode = \"anthropic\""));
        assert!(msg.contains("api_key_env = \"ANTHROPIC_API_KEY\""));
        assert!(msg.contains("mode = \"openai\""));
        assert!(msg.contains("api_key_env = \"OPENAI_API_KEY\""));
        assert!(msg.contains("mode = \"ollama\""));
        assert!(msg.contains("base_url = \"http://localhost:11434\""));
        assert!(msg.contains("mode = \"none\""));
        // Footer pointer at the release-prep doc.
        assert!(msg.contains("docs/releases/v0.9.0.md"));
    }
}

// fetch_recall_rows + RecallHit + RecallRow used to live here. Recall
// pipeline moved to solo_query::recall in commit (consolidate-recall);
// transports just call solo_query::run_recall and format the result.