solo_api/
mcp.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! MCP (Model Context Protocol) server for Solo.
4//!
5//! Exposes fourteen tools to MCP clients (Claude Desktop, Cursor, etc.):
6//!
7//! Episode tools (v0.1+, with v0.9.2 additions):
8//!   - `memory_remember(content, source_type?, source_id?, salience?)` —
9//!     store an episode. Returns the new MemoryId. `salience` (v0.9.2+)
10//!     is optional in [0.0, 1.0] and defaults to 0.5.
11//!   - `memory_remember_batch(items)` (v0.9.2+) — atomically store N
12//!     episodes in one writer-actor transaction. Each item has the
13//!     same fields as `memory_remember`. Returns an ordered array of
14//!     MemoryIds; either all items persist or none do.
15//!   - `memory_recall(query, limit?)` — vector search. Returns the top-K
16//!     matches with content + tier + status.
17//!   - `memory_forget(memory_id, reason?)` — soft-delete an episode.
18//!   - `memory_inspect(memory_id)` — return the full episode record.
19//!
20//! Derived-layer tools (v0.4.0+):
21//!   - `memory_themes(window_days?, limit?)` — list cluster themes.
22//!   - `memory_facts_about(subject, ...)` — query the structured-fact
23//!     knowledge graph (subject-predicate-object triples).
24//!   - `memory_contradictions(limit?)` — disagreements flagged during
25//!     consolidation.
26//!
27//! Derived-layer tools (v0.5.0+):
28//!   - `memory_inspect_cluster(cluster_id, full_content?)` — drill
29//!     into one cluster's abstraction + source episodes (truncated).
30//!
31//! Document tools (v0.7.0+):
32//!   - `memory_ingest_document(path)` — read a file from disk, split it
33//!     into chunks, embed each, and store under documents/document_chunks.
34//!   - `memory_search_docs(query, limit?)` — vector search restricted to
35//!     document chunks; returns chunk content + parent-doc context.
36//!   - `memory_inspect_document(doc_id)` — show one document's metadata
37//!     plus a previewed list of its chunks.
38//!   - `memory_list_documents(limit?, offset?, include_forgotten?)` —
39//!     paginate over ingested documents, newest first.
40//!   - `memory_forget_document(doc_id)` — soft-delete a document; chunks
41//!     stop appearing in `memory_search_docs` and tombstone in HNSW.
42//!
43//! ## Transport
44//!
45//! `serve_stdio` wires the server to stdin/stdout for use as a subprocess
46//! ("`claude_desktop_config.json` or `~/.cursor/mcp.json` invokes
47//! `solo mcp-stdio`"). The function awaits a graceful shutdown when stdin
48//! closes (parent disconnects) — same lifecycle as `solo daemon`'s
49//! Ctrl+C path.
50//!
51//! ## What's deferred
52//!
53//! - SSE/HTTP transports — `rmcp` ships them, but v0.1 ships stdio only.
54//! - `prompts/` and `resources/` capabilities — not needed for the
55//!   four-tool surface; ServerHandler defaults return empty lists.
56//! - Tool argument validation beyond JSON Schema typing — we trust rmcp
57//!   to deserialize per the schema, then serde-deserialize into our
58//!   typed param structs. Bad inputs surface as clear errors.
59
60use std::sync::Arc;
61
62use rmcp::handler::server::ServerHandler;
63use rmcp::model::{
64    CallToolRequestParams as CallToolRequestParam, CallToolResult, Content, Implementation,
65    InitializeRequestParams, InitializeResult, ListToolsResult,
66    PaginatedRequestParams as PaginatedRequestParam, ProtocolVersion,
67    ServerCapabilities, ServerInfo, Tool,
68};
69use rmcp::service::{RequestContext, RoleServer};
70use rmcp::{ErrorData as McpError, ServiceExt};
71use serde::{Deserialize, Serialize};
72use solo_core::{
73    Confidence, DocumentId, EncodingContext, Episode, MemoryId, Tier,
74};
75use solo_storage::{TenantHandle, TenantRegistry};
76use std::str::FromStr;
77
78/// The MCP server. Cheap to clone — every field is `Arc`-cloneable.
79///
80/// v0.8.0 P2: an MCP session resolves to **one tenant**. The session's
81/// `tenant_handle` is resolved at `initialize` time (today: from the
82/// CLI invocation via `solo mcp-stdio --tenant <id>`; future versions
83/// may resolve per-bearer-token via OIDC). Subsequent `tools/call`
84/// invocations route through the cached handle without re-resolving.
85/// Operators that need multi-tenant MCP spawn one `solo mcp-stdio`
86/// subprocess per tenant.
87#[derive(Clone)]
88pub struct SoloMcpServer {
89    inner: Arc<Inner>,
90}
91
92struct Inner {
93    /// Multi-tenant registry shared across all sessions. Held so that a
94    /// future MCP capability that lists/inspects other tenants has a
95    /// path to them (out of scope for v0.8.0 P2). P3 (auth) will use
96    /// this to re-resolve the tenant from a bearer-token claim.
97    #[allow(dead_code)]
98    registry: Arc<TenantRegistry>,
99    /// The tenant this MCP session speaks for. Resolved at session
100    /// construction time.
101    tenant: Arc<TenantHandle>,
102    /// Read-path aliases for the canonical `"user"` subject. Sourced
103    /// from `solo.config.toml` `[identity] user_aliases`; threaded
104    /// through to `solo_query::facts_about` so a query for `"alex"`
105    /// also surfaces rows historically extracted as `"user"`. Empty
106    /// vec = behave as today (no expansion).
107    user_aliases: Vec<String>,
108    /// v0.8.0 P4 audit-log principal for this MCP session. MCP is
109    /// bearer-only (no OIDC story in the spec), so the principal is
110    /// effectively `"bearer"` when the daemon was started with
111    /// `--bearer-token-file` and `None` otherwise. Persisted here so
112    /// every tool dispatch threads it into the audit emit without
113    /// reconstructing it per call.
114    audit_principal: Option<String>,
115}
116
117/// v0.9.0 P2: outcome of inspecting the tenant's `[llm]` config + the
118/// peer's `sampling` capability at MCP `initialize` time.
119///
120/// Separating the decision from the actual slot write makes the
121/// gating logic unit-testable without needing a real
122/// `rmcp::Peer<RoleServer>` (whose constructors are private).
123/// `SoloMcpServer::initialize` performs the match and routes to the
124/// side-effect path; tests pin the table directly.
125#[derive(Debug, Clone, Copy, PartialEq, Eq)]
126pub enum InitializeDecision {
127    /// Tenant's LLM backend doesn't require an MCP peer; the slot was
128    /// populated eagerly at registry-open time (or stays `None` for
129    /// `LlmConfig::None`). MCP initialize succeeds without writing the
130    /// slot.
131    Allow,
132    /// Tenant's LLM backend is `mcp_sampling` AND the peer advertised
133    /// the `sampling` capability. `populate_sampling_steward` writes a
134    /// peer-bound Steward into the slot.
135    PopulateSamplingSteward,
136    /// Tenant's LLM backend is `mcp_sampling` but the peer did NOT
137    /// advertise the `sampling` capability. MCP initialize must refuse
138    /// with the locked BLOCKER 2 error message.
139    RejectMissingSamplingCapability,
140}
141
142/// v0.9.0 P2: decide the initialize outcome given the tenant's
143/// `[llm]` config and whether the peer advertised the `sampling`
144/// capability.
145///
146/// Pure function — no side effects, no rmcp peer required. Pinned by
147/// `initialize_decision_*` tests.
148pub fn initialize_decision(
149    llm_settings: &Option<solo_storage::LlmSettings>,
150    peer_sampling_supported: bool,
151) -> InitializeDecision {
152    match llm_settings {
153        Some(settings) if settings.requires_mcp_peer() => {
154            if peer_sampling_supported {
155                InitializeDecision::PopulateSamplingSteward
156            } else {
157                InitializeDecision::RejectMissingSamplingCapability
158            }
159        }
160        _ => InitializeDecision::Allow,
161    }
162}
163
164/// v0.9.0 P2: locked error message body for both the daemon-startup
165/// rejection guard and the MCP `initialize` capability gate (plan §3
166/// Decision 4 / BLOCKER 2 resolution). Returned verbatim to the
167/// operator so the commented-out TOML snippets are copy-pasteable.
168///
169/// Lives at module scope so the daemon startup path (in `solo-cli`)
170/// and the `SoloMcpServer::initialize` hook share one source of truth
171/// — a future audit-revision can grep the locked phrasing without
172/// chasing two divergent copies.
173pub fn sampling_capability_missing_error_message() -> String {
174    [
175        "LLM backend `mcp_sampling` requires a connected MCP client that",
176        "advertises the `sampling` capability at initialize. Either the",
177        "current MCP client does not support sampling, or this Solo",
178        "process is running in daemon-only mode (no peer to call back).",
179        "",
180        "Pick one of:",
181        "",
182        "  # Anthropic (hosted):",
183        "  [llm]",
184        "  mode = \"anthropic\"",
185        "  api_key_env = \"ANTHROPIC_API_KEY\"",
186        "  model = \"claude-sonnet-4-6\"",
187        "",
188        "  # OpenAI (hosted):",
189        "  [llm]",
190        "  mode = \"openai\"",
191        "  api_key_env = \"OPENAI_API_KEY\"",
192        "  model = \"gpt-5o\"",
193        "",
194        "  # Ollama (local daemon):",
195        "  [llm]",
196        "  mode = \"ollama\"",
197        "  base_url = \"http://localhost:11434\"",
198        "  model = \"qwen3-coder:30b\"",
199        "",
200        "  # None (cluster-only; abstractions skipped):",
201        "  [llm]",
202        "  mode = \"none\"",
203        "",
204        "See docs/releases/v0.9.0.md \u{00a7}LLM-backend selection for details.",
205    ]
206    .join("\n")
207}
208
209/// v0.8.1 P2: env var name MCP clients set when launching the server
210/// process to attribute audit rows on the stdio transport. Closes the
211/// v0.8.0 known-issue gap where MCP audit rows always carried
212/// `principal_subject = NULL` on the daemon path.
213///
214/// Precedence (when the future HTTP-MCP transport lands):
215///   1. `Authorization: Bearer <token>` header on the HTTP-MCP request
216///      (resolved through `AuthConfig::Bearer` validator).
217///   2. `SOLO_MCP_PRINCIPAL_TOKEN` env var on the spawned process.
218///
219/// For the v0.8.x stdio-only world only the env-var path applies; the
220/// header path is a no-op (no HTTP transport wired). The constant lives
221/// at module scope so external callers (CLI subcommand, tests) reference
222/// it by name rather than re-typing the string literal.
223pub const ENV_MCP_PRINCIPAL_TOKEN: &str = "SOLO_MCP_PRINCIPAL_TOKEN";
224
225/// v0.8.1 P2: resolve the MCP-session principal at `initialize`-time.
226///
227/// Reads `SOLO_MCP_PRINCIPAL_TOKEN` env var (stdio path); future HTTP-MCP
228/// callers will pass the bearer header value in via the explicit
229/// `header_value` arg. The header beats the env when both are present.
230///
231/// Returns `Some(subject)` on resolution success; `None` when neither
232/// source carries a non-empty value. Empty / whitespace-only values are
233/// treated as absent so an accidentally-set `SOLO_MCP_PRINCIPAL_TOKEN=""`
234/// in a launcher script doesn't pin every audit row to a blank principal.
235///
236/// The current implementation treats the env var value as the principal
237/// subject directly. A future hardening pass can validate against the
238/// daemon's `[auth] bearer.token` config to refuse mismatched tokens —
239/// today the env var is operator-trusted (same trust model as
240/// `SOLO_PASSPHRASE`).
241pub fn resolve_mcp_principal(header_value: Option<&str>) -> Option<String> {
242    // HTTP-MCP path wins when configured.
243    if let Some(h) = header_value {
244        if let Some(token) = h.strip_prefix("Bearer ") {
245            let trimmed = token.trim();
246            if !trimmed.is_empty() {
247                // Header carries the raw bearer token. Same shape as the
248                // stdio env-var path: the *value* is the principal
249                // subject in v0.8.1; v0.8.2+ may validate against a
250                // configured token set and surface the JWT `sub` claim
251                // instead.
252                return Some(trimmed.to_string());
253            }
254        }
255    }
256    // Stdio env-var fallback.
257    match std::env::var(ENV_MCP_PRINCIPAL_TOKEN) {
258        Ok(v) => {
259            let trimmed = v.trim();
260            if trimmed.is_empty() {
261                None
262            } else {
263                Some(trimmed.to_string())
264            }
265        }
266        Err(_) => None,
267    }
268}
269
270impl SoloMcpServer {
271    /// Build a server speaking for `tenant` (v0.8.0 P2 — one MCP session
272    /// ↔ one tenant). The registry is held so future capabilities can
273    /// reach across tenants if needed; today every handler routes
274    /// through `self.inner.tenant`.
275    ///
276    /// v0.8.1 P2: auto-resolves the audit principal from the
277    /// `SOLO_MCP_PRINCIPAL_TOKEN` env var (see [`resolve_mcp_principal`]).
278    /// When neither the env var nor a header is set, the principal stays
279    /// `None` — preserving v0.8.0 behavior for single-user setups.
280    pub fn new_for_tenant(
281        registry: Arc<TenantRegistry>,
282        tenant: Arc<TenantHandle>,
283        user_aliases: Vec<String>,
284    ) -> Self {
285        let principal = resolve_mcp_principal(None);
286        Self::new_for_tenant_with_principal(registry, tenant, user_aliases, principal)
287    }
288
289    /// v0.8.0 P4: like [`Self::new_for_tenant`], but records an explicit
290    /// audit principal subject for every tool dispatch. MCP is
291    /// bearer-only at v0.8.0 — the orchestration layer (today: the
292    /// daemon's `--bearer-token-file` path) decides whether a session
293    /// counts as "bearer-authenticated" and passes `Some("bearer")`;
294    /// CLI / unauth paths pass `None`.
295    ///
296    /// v0.8.1 P2: when the caller passes `audit_principal = None`, the
297    /// env-var auto-resolution still runs (in `new_for_tenant`). Callers
298    /// who want to *explicitly* suppress env-var resolution can call
299    /// this method with `None` after `std::env::remove_var(...)`, or use
300    /// the dedicated test constructor that bypasses env reads.
301    pub fn new_for_tenant_with_principal(
302        registry: Arc<TenantRegistry>,
303        tenant: Arc<TenantHandle>,
304        user_aliases: Vec<String>,
305        audit_principal: Option<String>,
306    ) -> Self {
307        Self {
308            inner: Arc::new(Inner {
309                registry,
310                tenant,
311                user_aliases,
312                audit_principal,
313            }),
314        }
315    }
316}
317
318/// Convenience: run the server over stdio and await its termination.
319/// Returns when stdin closes (parent disconnect) or the runtime exits.
320pub async fn serve_stdio(server: SoloMcpServer) -> anyhow::Result<()> {
321    use rmcp::transport::io::stdio;
322    let (stdin, stdout) = stdio();
323    let running = server.serve((stdin, stdout)).await?;
324    running.waiting().await?;
325    Ok(())
326}
327
328// ---------------------------------------------------------------------------
329// Tool argument schemas
330// ---------------------------------------------------------------------------
331
332#[derive(Debug, Clone, Serialize, Deserialize)]
333pub struct RememberArgs {
334    pub content: String,
335    #[serde(default)]
336    pub source_type: Option<String>,
337    #[serde(default)]
338    pub source_id: Option<String>,
339    /// v0.9.2 — optional salience in [0.0, 1.0]. `None` → 0.5 (preserves
340    /// pre-v0.9.2 behaviour). Out-of-range values are rejected by
341    /// [`Self::validate_salience`] before reaching the writer.
342    #[serde(default)]
343    pub salience: Option<f32>,
344}
345
346/// v0.9.2 — one item in a `memory_remember_batch` request.
347///
348/// Mirrors [`RememberArgs`] field-for-field minus the wrapper-tool
349/// invariant: callers pass an array of these inside [`RememberBatchArgs`].
350/// All items in a batch are persisted in a single `BEGIN IMMEDIATE`
351/// transaction (per dev-log 0120 §3 Decision A) so partial-failure
352/// scenarios are impossible from the client's perspective — either
353/// every item lands or none do.
354#[derive(Debug, Clone, Serialize, Deserialize)]
355pub struct RememberItem {
356    pub content: String,
357    #[serde(default)]
358    pub source_type: Option<String>,
359    #[serde(default)]
360    pub source_id: Option<String>,
361    /// Optional salience in [0.0, 1.0]; `None` → 0.5. See
362    /// [`RememberArgs::salience`].
363    #[serde(default)]
364    pub salience: Option<f32>,
365}
366
367/// v0.9.2 — args for the new `memory_remember_batch` MCP tool.
368///
369/// Wraps `Vec<RememberItem>`. The handler validates `items.is_empty()`
370/// and `items.len() > MAX_REMEMBER_BATCH_SIZE` before any embedding
371/// work; per-item content/salience is validated immediately afterwards.
372#[derive(Debug, Clone, Serialize, Deserialize)]
373pub struct RememberBatchArgs {
374    pub items: Vec<RememberItem>,
375}
376
377/// Validate that an optional salience value is well-formed (NaN-free
378/// and inside `[0.0, 1.0]`). Centralised so both `memory_remember` and
379/// `memory_remember_batch` share the same rejection shape.
380fn validate_salience(salience: Option<f32>) -> std::result::Result<(), McpError> {
381    if let Some(s) = salience {
382        if !s.is_finite() || !(0.0..=1.0).contains(&s) {
383            return Err(McpError::invalid_params(
384                format!("salience must be in [0.0, 1.0]; got {s}"),
385                None,
386            ));
387        }
388    }
389    Ok(())
390}
391
392#[derive(Debug, Clone, Serialize, Deserialize)]
393pub struct RecallArgs {
394    pub query: String,
395    #[serde(default = "default_limit")]
396    pub limit: usize,
397}
398
399fn default_limit() -> usize {
400    5
401}
402
403#[derive(Debug, Clone, Serialize, Deserialize)]
404pub struct ForgetArgs {
405    pub memory_id: String,
406    #[serde(default = "default_forget_reason")]
407    pub reason: String,
408}
409
410fn default_forget_reason() -> String {
411    "user-initiated via MCP".into()
412}
413
414#[derive(Debug, Clone, Serialize, Deserialize)]
415pub struct InspectArgs {
416    pub memory_id: String,
417}
418
419// Path 1 derived-layer tools (v0.4.0+) — query the Steward's outputs.
420// `solo_query::derived` is the single source of truth; these handlers
421// just translate JSON args to function args and serialise the result
422// vec to JSON for the MCP wire.
423
424#[derive(Debug, Clone, Serialize, Deserialize)]
425pub struct ThemesArgs {
426    /// Optional time window in days; `None` = unfiltered, return up
427    /// to `limit` most-recent themes across all time. `Some(7)` =
428    /// "themes from the last week".
429    #[serde(default)]
430    pub window_days: Option<i64>,
431    #[serde(default = "default_limit")]
432    pub limit: usize,
433}
434
435#[derive(Debug, Clone, Serialize, Deserialize)]
436pub struct FactsAboutArgs {
437    /// Subject id to query — required (predicate-only scans
438    /// intentionally not supported).
439    pub subject: String,
440    #[serde(default)]
441    pub predicate: Option<String>,
442    #[serde(default)]
443    pub since_ms: Option<i64>,
444    #[serde(default)]
445    pub until_ms: Option<i64>,
446    /// v0.5.1 Priority 8 — widen the query to also match rows where
447    /// `subject` appears as the object (e.g. surface "Sam pushes back
448    /// on PRs about Maya" under `facts_about(subject="maya")`).
449    /// Default `false` preserves v0.5.0 behaviour.
450    #[serde(default)]
451    pub include_as_object: bool,
452    #[serde(default = "default_limit")]
453    pub limit: usize,
454}
455
456#[derive(Debug, Clone, Serialize, Deserialize)]
457pub struct ContradictionsArgs {
458    #[serde(default = "default_limit")]
459    pub limit: usize,
460}
461
462/// Args for `memory_inspect_cluster` (v0.5.0 Priority 3). `cluster_id`
463/// is required; `full_content` is opt-in for the rare power-user case
464/// where 200-char-per-episode truncation is too aggressive.
465#[derive(Debug, Clone, Serialize, Deserialize)]
466pub struct InspectClusterArgs {
467    pub cluster_id: String,
468    /// If `true`, episode `content` fields are returned verbatim. If
469    /// `false` or omitted (the default), each episode's content is
470    /// truncated to `solo_query::EPISODE_TRUNCATE_CHARS` chars with a
471    /// trailing `…`.
472    #[serde(default)]
473    pub full_content: bool,
474}
475
476// Document tools (v0.7.0+). Five args structs paired with five handlers.
477// Wire shapes per `docs/dev-log/0083-v0.7.0-implementation-plan.md` §2 P5.
478
479#[derive(Debug, Clone, Serialize, Deserialize)]
480pub struct IngestDocumentArgs {
481    /// Server-side filesystem path to the file to ingest. Must be
482    /// readable by the Solo process. The writer parses the file by
483    /// extension, splits it into ~500-token chunks, embeds each, and
484    /// stores them under `documents` + `document_chunks`.
485    pub path: String,
486}
487
488#[derive(Debug, Clone, Serialize, Deserialize)]
489pub struct SearchDocsArgs {
490    pub query: String,
491    #[serde(default = "default_search_docs_limit")]
492    pub limit: usize,
493}
494
495fn default_search_docs_limit() -> usize {
496    5
497}
498
499#[derive(Debug, Clone, Serialize, Deserialize)]
500pub struct InspectDocumentArgs {
501    pub doc_id: String,
502}
503
504#[derive(Debug, Clone, Serialize, Deserialize)]
505pub struct ListDocumentsArgs {
506    #[serde(default = "default_list_documents_limit")]
507    pub limit: usize,
508    #[serde(default)]
509    pub offset: usize,
510    /// If `true`, also include documents the user has forgotten. Default
511    /// `false` matches the agent-UX expectation that recall + listing
512    /// ignore soft-deleted rows.
513    #[serde(default)]
514    pub include_forgotten: bool,
515}
516
517fn default_list_documents_limit() -> usize {
518    20
519}
520
521#[derive(Debug, Clone, Serialize, Deserialize)]
522pub struct ForgetDocumentArgs {
523    pub doc_id: String,
524}
525
526// ---------------------------------------------------------------------------
527// ServerHandler implementation
528// ---------------------------------------------------------------------------
529
530impl ServerHandler for SoloMcpServer {
531    fn get_info(&self) -> ServerInfo {
532        // rmcp 1.x: ServerInfo is non-exhaustive AND lives in another crate,
533        // so neither struct-literal nor functional-update syntax (..) is
534        // allowed from outside. Build via mut on a Default::default().
535        let capabilities = ServerCapabilities::builder()
536            .enable_tools()
537            .build();
538        let mut info = ServerInfo::default();
539        info.protocol_version = ProtocolVersion::default();
540        info.capabilities = capabilities;
541        // v0.9.1 P1 Fix 1 — `Implementation::from_build_env()` reads
542        // `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from rmcp's OWN build
543        // environment (the helper lives in rmcp, so the proc-macro
544        // expansion captures rmcp's manifest, not ours). On v0.9.0 every
545        // Solo MCP daemon self-identified as `{name: "rmcp", version: "1.7.0"}`.
546        // Pinned by `tests::server_info_identity_is_solo_not_rmcp_or_solo_api`.
547        // The literal `"solo"` (not `env!("CARGO_PKG_NAME")`) is deliberate:
548        // this crate is `solo-api`, but the operator-facing identity is
549        // the binary name `solo`.
550        info.server_info = Implementation::new(
551            "solo".to_string(),
552            env!("CARGO_PKG_VERSION").to_string(),
553        );
554        info.instructions = Some(
555            "Solo gives you persistent memory across conversations \
556                 with this user — what they've told you before, the \
557                 people and projects in their life, and where their \
558                 stated beliefs have shifted, plus a library of \
559                 documents the user has ingested (notes, runbooks, \
560                 PDFs). Reach for these tools whenever the user \
561                 references something from earlier (\"like I \
562                 mentioned\", \"the project I'm working on\", \"my \
563                 friend Alex\", \"the notes I uploaded last week\") \
564                 or asks a question that hinges on personal context \
565                 or document content you don't have in the current \
566                 chat. \
567                 \n\nTools to write or look up specific moments: \
568                 memory_remember (save something worth keeping), \
569                 memory_recall (search past conversations by topic), \
570                 memory_inspect (show one saved item by id), \
571                 memory_forget (delete one saved item). \
572                 \n\nTools for the bigger picture (populated as the \
573                 user uses Solo over time): memory_themes (recent \
574                 topics they've been thinking about), \
575                 memory_facts_about (what you know about a person, \
576                 project, or place — \"what do you know about \
577                 Alex?\"), memory_contradictions (places where the \
578                 user has said two things that disagree — surface \
579                 these before answering), memory_inspect_cluster \
580                 (the raw conversations behind one summary). \
581                 \n\nTools for the user's documents: \
582                 memory_ingest_document (read a file from disk and \
583                 add it to Solo's library), memory_search_docs \
584                 (search across ingested documents by topic — use \
585                 when the user asks about something they wrote down \
586                 or saved as a file), memory_inspect_document (show \
587                 one document's metadata plus a preview of its \
588                 chunks), memory_list_documents (browse documents \
589                 by recency), memory_forget_document (drop a \
590                 document from the library)."
591                .into(),
592        );
593        info
594    }
595
596    /// v0.9.0 P2: override `initialize` so we can:
597    ///
598    ///   1. Cache the client's `InitializeRequestParams` on the peer
599    ///      (delegates to rmcp's default for this).
600    ///   2. If the tenant's `[llm] mode = "mcp_sampling"`:
601    ///      a. Refuse to initialize when the peer didn't advertise the
602    ///         `sampling` capability — surfaces the BLOCKER 2-locked
603    ///         error message so the user sees commented-out
604    ///         alternative TOML blocks.
605    ///      b. Otherwise build a `SamplingLlmClient`-backed Steward and
606    ///         write it into `tenant.steward_slot()` so the writer
607    ///         actor's next consolidate-tick reads a populated slot.
608    ///   3. For any other `[llm]` mode, return the configured tools
609    ///      surface unchanged (the slot was eagerly populated at
610    ///      registry-open time by the static StewardFactory).
611    async fn initialize(
612        &self,
613        request: InitializeRequestParams,
614        context: RequestContext<RoleServer>,
615    ) -> std::result::Result<InitializeResult, McpError> {
616        // Defer to rmcp's default for peer-info caching (matches the
617        // `if peer_info().is_none()` shape).
618        if context.peer.peer_info().is_none() {
619            context.peer.set_peer_info(request.clone());
620        }
621
622        let llm_settings =
623            self.inner.tenant.config().llm.as_ref().cloned();
624        let peer_sampling_supported =
625            request.capabilities.sampling.is_some();
626        match initialize_decision(&llm_settings, peer_sampling_supported) {
627            InitializeDecision::Allow => {}
628            InitializeDecision::PopulateSamplingSteward => {
629                // Build the sampling-backed Steward against the live
630                // peer + the per-tenant write handle, then write it
631                // into the slot.
632                self.populate_sampling_steward(&context).await;
633            }
634            InitializeDecision::RejectMissingSamplingCapability => {
635                return Err(McpError::invalid_request(
636                    sampling_capability_missing_error_message(),
637                    None,
638                ));
639            }
640        }
641
642        Ok(self.get_info())
643    }
644
645    async fn list_tools(
646        &self,
647        _request: Option<PaginatedRequestParam>,
648        _context: RequestContext<RoleServer>,
649    ) -> std::result::Result<ListToolsResult, McpError> {
650        Ok(ListToolsResult {
651            tools: build_tools(),
652            next_cursor: None,
653            ..Default::default()
654        })
655    }
656
657    async fn call_tool(
658        &self,
659        request: CallToolRequestParam,
660        _context: RequestContext<RoleServer>,
661    ) -> std::result::Result<CallToolResult, McpError> {
662        let CallToolRequestParam { name, arguments, .. } = request;
663        let args_value = serde_json::Value::Object(arguments.unwrap_or_default());
664        // v0.11.0 P3: stdio transport has no per-session broadcast
665        // channel to publish progress events through (one process =
666        // one tenant = one implicit "session" for the subprocess's
667        // lifetime). Pass `None` — handlers see it and skip the
668        // emission code paths silently.
669        self.dispatch_tool(&name, args_value, None).await
670    }
671}
672
673impl SoloMcpServer {
674    /// v0.9.0 P2: build a sampling-backed `Arc<Steward>` for the
675    /// current MCP session and write it into the tenant's
676    /// `steward_slot`. Called from [`Self::initialize`] when:
677    ///
678    ///   * `tenant.config().llm.requires_mcp_peer()` is true, AND
679    ///   * the peer advertised the `sampling` capability.
680    ///
681    /// Implementation notes:
682    ///
683    ///   * `StewardConfig::from_env()` is parsed best-effort; if the
684    ///     env vars are malformed, we fall back to `default()` and
685    ///     log a warning. This matches `daemon.rs`'s tolerance — a
686    ///     bad env var shouldn't block an MCP session from initialising.
687    ///
688    ///   * The slot is OVERWRITTEN unconditionally — a fresh MCP
689    ///     session always wins. If a prior session's
690    ///     `SamplingLlmClient` had outstanding requests, they error out
691    ///     on the rmcp layer when their peer drops.
692    ///
693    ///   * The cached `audit_principal` is the one the MCP server
694    ///     constructed for this session via `resolve_mcp_principal`.
695    ///     Every `peer.create_message` call from this Steward routes
696    ///     that principal through to the per-tenant
697    ///     `AuditOperation::LlmSamplingCall` row.
698    async fn populate_sampling_steward(
699        &self,
700        context: &RequestContext<RoleServer>,
701    ) {
702        let steward_config = solo_steward::StewardConfig::from_env()
703            .unwrap_or_else(|e| {
704                tracing::warn!(
705                    error = %e,
706                    "v0.9.0 P2: StewardConfig::from_env failed at MCP \
707                     initialize; falling back to defaults"
708                );
709                solo_steward::StewardConfig::default()
710            });
711        // v0.9.0 P5 (M3 wiring): read `[sampling]` from the tenant's
712        // already-parsed `SoloConfig`. `SamplingConfig::default()` lands
713        // when the block is omitted (5s window / 10 max-batch); operator
714        // overrides flow through to `build_sampling_steward` and into
715        // `SamplingCoordinator::with_settings`.
716        let sampling_config = self.inner.tenant.config().sampling.clone();
717        let peer = context.peer.clone();
718        let write_handle = self.inner.tenant.write().clone();
719        let steward = crate::llm::build_sampling_steward(
720            peer,
721            write_handle,
722            self.inner.audit_principal.clone(),
723            steward_config,
724            sampling_config.clone(),
725        );
726        let slot = self.inner.tenant.steward_slot();
727        let mut guard = slot.write().await;
728        *guard = Some(steward);
729        tracing::info!(
730            tenant = %self.inner.tenant.tenant_id(),
731            coalesce_window_ms = sampling_config.coalesce_window_ms,
732            coalesce_max_requests = sampling_config.coalesce_max_requests,
733            "v0.9.0 P5: MCP-sampling Steward attached to tenant.steward_slot \
734             (PeerSamplingClient → SamplingCoordinator → SamplingLlmClient)"
735        );
736    }
737
738    /// Direct tool-dispatch path used by both `call_tool` (the
739    /// ServerHandler trait method, behind the rmcp protocol layer) and
740    /// in-process tests that don't want to spin up a full transport pair.
741    /// Bypasses `RequestContext` (which requires a `Peer` not constructible
742    /// outside rmcp internals).
743    ///
744    /// v0.11.0 P3: `progress` is `Some` only when the HTTP transport
745    /// dispatched the request AND the client opted in via
746    /// `_meta.progressToken`. The three long-running handlers
747    /// (`memory_ingest_document`, `memory_search_docs`,
748    /// `memory_remember_batch`) consult the reporter; the other
749    /// 11 handlers ignore it (backward compat with stdio and with
750    /// HTTP clients that did not opt in).
751    pub async fn dispatch_tool(
752        &self,
753        name: &str,
754        args_value: serde_json::Value,
755        progress: Option<crate::mcp_progress::ProgressReporter>,
756    ) -> std::result::Result<CallToolResult, McpError> {
757        match name {
758            "memory_remember" => {
759                let args: RememberArgs = parse_args(&args_value)?;
760                self.handle_remember(args).await
761            }
762            "memory_remember_batch" => {
763                let args: RememberBatchArgs = parse_args(&args_value)?;
764                self.handle_remember_batch(args, progress).await
765            }
766            "memory_recall" => {
767                let args: RecallArgs = parse_args(&args_value)?;
768                self.handle_recall(args).await
769            }
770            "memory_forget" => {
771                let args: ForgetArgs = parse_args(&args_value)?;
772                self.handle_forget(args).await
773            }
774            "memory_inspect" => {
775                let args: InspectArgs = parse_args(&args_value)?;
776                self.handle_inspect(args).await
777            }
778            "memory_themes" => {
779                let args: ThemesArgs = parse_args(&args_value)?;
780                self.handle_themes(args).await
781            }
782            "memory_facts_about" => {
783                let args: FactsAboutArgs = parse_args(&args_value)?;
784                self.handle_facts_about(args).await
785            }
786            "memory_contradictions" => {
787                let args: ContradictionsArgs = parse_args(&args_value)?;
788                self.handle_contradictions(args).await
789            }
790            "memory_inspect_cluster" => {
791                let args: InspectClusterArgs = parse_args(&args_value)?;
792                self.handle_inspect_cluster(args).await
793            }
794            "memory_ingest_document" => {
795                let args: IngestDocumentArgs = parse_args(&args_value)?;
796                self.handle_ingest_document(args, progress).await
797            }
798            "memory_search_docs" => {
799                let args: SearchDocsArgs = parse_args(&args_value)?;
800                self.handle_search_docs(args, progress).await
801            }
802            "memory_inspect_document" => {
803                let args: InspectDocumentArgs = parse_args(&args_value)?;
804                self.handle_inspect_document(args).await
805            }
806            "memory_list_documents" => {
807                let args: ListDocumentsArgs = parse_args(&args_value)?;
808                self.handle_list_documents(args).await
809            }
810            "memory_forget_document" => {
811                let args: ForgetDocumentArgs = parse_args(&args_value)?;
812                self.handle_forget_document(args).await
813            }
814            other => Err(McpError::invalid_params(
815                format!("unknown tool `{other}`"),
816                None,
817            )),
818        }
819    }
820
821    /// List the tools this server exposes. Mirrors `ServerHandler::list_tools`
822    /// without requiring a RequestContext.
823    pub fn dispatch_list_tools(&self) -> Vec<Tool> {
824        build_tools()
825    }
826}
827
828fn parse_args<T: serde::de::DeserializeOwned>(
829    v: &serde_json::Value,
830) -> std::result::Result<T, McpError> {
831    serde_json::from_value(v.clone()).map_err(|e| {
832        McpError::invalid_params(format!("invalid tool arguments: {e}"), None)
833    })
834}
835
836fn solo_to_mcp(e: solo_core::Error) -> McpError {
837    use solo_core::Error;
838    match e {
839        Error::NotFound(msg) => McpError::invalid_params(msg, None),
840        Error::InvalidInput(msg) => McpError::invalid_params(msg, None),
841        Error::Conflict(msg) => McpError::invalid_params(msg, None),
842        other => McpError::internal_error(other.to_string(), None),
843    }
844}
845
846// ---------------------------------------------------------------------------
847// Tool definitions (JSON Schema)
848// ---------------------------------------------------------------------------
849
850fn build_tools() -> Vec<Tool> {
851    vec![
852        Tool::new(
853            "memory_remember",
854            "Save something the user has told you — a fact, a \
855             preference, a name, a date, a context — so you can pick \
856             it up next conversation. Use whenever the user mentions \
857             something they'd reasonably expect you to recall later \
858             (\"I just started at Quotient\", \"my partner is Maya\"). \
859             Returns the saved item's id.",
860            json_schema_object(serde_json::json!({
861                "type": "object",
862                "properties": {
863                    "content": {
864                        "type": "string",
865                        "description": "The text to remember.",
866                    },
867                    "source_type": {
868                        "type": "string",
869                        "description": "Optional source-type tag (default: \"user_message\"). See docs/mcp/source-types.md for convention values.",
870                    },
871                    "source_id": {
872                        "type": "string",
873                        "description": "Optional upstream id for traceability.",
874                    },
875                    "salience": {
876                        "type": "number",
877                        "description": "Optional salience in [0.0, 1.0]; defaults to 0.5. Higher values bias toward recall ranking + retention. v0.9.2+.",
878                        "minimum": 0.0,
879                        "maximum": 1.0,
880                    },
881                },
882                "required": ["content"],
883            })),
884        ),
885        // v0.9.2 — atomic batched-remember for agentic clients. Wraps
886        // every item in one BEGIN IMMEDIATE tx so a single
887        // `memory_remember_batch` call either persists all N items or
888        // none. Designed for the solo-jarvis turn-flush pattern (per
889        // dev-log 0120 §1).
890        Tool::new(
891            "memory_remember_batch",
892            "Save several items atomically in one transaction — either \
893             every item lands or none does. Use this when you have a \
894             collection of related episodes from one logical step (a \
895             conversation turn, a tool-output bundle, an ingest batch) \
896             and partial success would leave the user's memory in a \
897             confusing half-state. Each item carries the same fields as \
898             memory_remember (content + optional source_type, source_id, \
899             salience). Returns an ordered array of memory_ids matching \
900             the input items. v0.9.2+.",
901            json_schema_object(serde_json::json!({
902                "type": "object",
903                "properties": {
904                    "items": {
905                        "type": "array",
906                        "description": "Items to remember atomically. Max 200 per call.",
907                        "minItems": 1,
908                        "maxItems": 200,
909                        "items": {
910                            "type": "object",
911                            "properties": {
912                                "content": {
913                                    "type": "string",
914                                    "description": "The text to remember.",
915                                },
916                                "source_type": {
917                                    "type": "string",
918                                    "description": "Optional source-type tag (default: \"user_message\"). See docs/mcp/source-types.md.",
919                                },
920                                "source_id": {
921                                    "type": "string",
922                                    "description": "Optional upstream id for traceability.",
923                                },
924                                "salience": {
925                                    "type": "number",
926                                    "description": "Optional salience in [0.0, 1.0]; defaults to 0.5.",
927                                    "minimum": 0.0,
928                                    "maximum": 1.0,
929                                },
930                            },
931                            "required": ["content"],
932                        },
933                    },
934                },
935                "required": ["items"],
936            })),
937        ),
938        Tool::new(
939            "memory_recall",
940            "Search past conversations with this user by topic or \
941             phrase. Returns up to `limit` of the closest matches, \
942             best match first. Use when the user references \
943             something they said before (\"that book I told you \
944             about\", \"the bug we were debugging last week\"). \
945             Skips items the user has deleted.",
946            json_schema_object(serde_json::json!({
947                "type": "object",
948                "properties": {
949                    "query": {
950                        "type": "string",
951                        "description": "The query text.",
952                    },
953                    "limit": {
954                        "type": "integer",
955                        "description": "Maximum results (default 5).",
956                        "minimum": 1,
957                        "maximum": 100,
958                    },
959                },
960                "required": ["query"],
961            })),
962        ),
963        Tool::new(
964            "memory_forget",
965            "Delete one saved item by id. Use when the user asks you \
966             to forget something specific (\"forget that I said \
967             X\"). The item stops appearing in future recalls. \
968             Reversible only via backups.",
969            json_schema_object(serde_json::json!({
970                "type": "object",
971                "properties": {
972                    "memory_id": {
973                        "type": "string",
974                        "description": "MemoryId to forget (UUID v7).",
975                    },
976                    "reason": {
977                        "type": "string",
978                        "description": "Optional free-form reason (logged, not yet persisted).",
979                    },
980                },
981                "required": ["memory_id"],
982            })),
983        ),
984        Tool::new(
985            "memory_inspect",
986            "Show the full record for one saved item — when it was \
987             saved, where it came from, and the full text. Use after \
988             memory_recall when you want the complete content of a \
989             specific hit (recall results may be truncated).",
990            json_schema_object(serde_json::json!({
991                "type": "object",
992                "properties": {
993                    "memory_id": {
994                        "type": "string",
995                        "description": "MemoryId to inspect (UUID v7).",
996                    },
997                },
998                "required": ["memory_id"],
999            })),
1000        ),
1001        // Path 1 derived-layer tools (v0.4.0+) — query the Steward's
1002        // outputs. These four are populated by `solo consolidate` and
1003        // were previously unreadable except via direct SQL.
1004        Tool::new(
1005            "memory_themes",
1006            "Recent topics the user has been thinking about. Use to \
1007             orient yourself at the start of a conversation, or when \
1008             the user asks \"what have I been up to\" / \"what was I \
1009             working on last week\". Pass `window_days` to scope \
1010             (e.g. 7 for last week); omit for all-time.",
1011            json_schema_object(serde_json::json!({
1012                "type": "object",
1013                "properties": {
1014                    "window_days": {
1015                        "type": "integer",
1016                        "description": "Optional time window in days. Omit for unfiltered.",
1017                        "minimum": 1,
1018                    },
1019                    "limit": {
1020                        "type": "integer",
1021                        "description": "Maximum results (default 5).",
1022                        "minimum": 1,
1023                        "maximum": 100,
1024                    },
1025                },
1026            })),
1027        ),
1028        Tool::new(
1029            "memory_facts_about",
1030            "Look up what you remember about a person, project, or \
1031             topic — names, dates, preferences, relationships. Use \
1032             when the user asks \"what do you know about Alex?\", \
1033             \"when did I start at Quotient?\", \"who is Maya?\", or \
1034             whenever you need grounded facts about someone or \
1035             something before answering. Subject is required (the \
1036             person/place/thing you're asking about); narrow further \
1037             with `predicate` (\"works_at\", \"lives_in\") or a date \
1038             range. Set `include_as_object=true` to also surface \
1039             facts where the subject appears on the receiving side of \
1040             a relationship (e.g. \"Sam pushes back on PRs about \
1041             Maya\" surfaces under facts_about(subject=\"Maya\", \
1042             include_as_object=true)). (Backed by \
1043             subject-predicate-object triples distilled from past \
1044             conversations.) Clients should set a 30s timeout on this \
1045             call; if exceeded, retry once or fall back to \
1046             `memory_recall`.",
1047            json_schema_object(serde_json::json!({
1048                "type": "object",
1049                "properties": {
1050                    "subject": {
1051                        "type": "string",
1052                        "description": "Subject id to query (e.g. 'Sam').",
1053                    },
1054                    "predicate": {
1055                        "type": "string",
1056                        "description": "Optional predicate filter (e.g. 'works_at').",
1057                    },
1058                    "since_ms": {
1059                        "type": "integer",
1060                        "description": "Optional valid_from_ms lower bound (epoch ms).",
1061                    },
1062                    "until_ms": {
1063                        "type": "integer",
1064                        "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through.",
1065                    },
1066                    "include_as_object": {
1067                        "type": "boolean",
1068                        "description": "If true, also match facts where `subject` appears as the object (e.g. 'Sam pushes back on PRs about Maya' surfaces under subject='Maya'). Default false.",
1069                        "default": false,
1070                    },
1071                    "limit": {
1072                        "type": "integer",
1073                        "description": "Maximum results (default 5).",
1074                        "minimum": 1,
1075                        "maximum": 100,
1076                    },
1077                },
1078                "required": ["subject"],
1079            })),
1080        ),
1081        Tool::new(
1082            "memory_contradictions",
1083            "Find places where the user's stated beliefs or facts \
1084             disagree across conversations — flag disagreements \
1085             before answering. Use whenever you're about to rely on \
1086             a remembered fact that could have changed (jobs, \
1087             relationships, preferences, opinions); a disagreement \
1088             here means the user has told you both X and not-X over \
1089             time and you should ask which is current instead of \
1090             guessing. Each result shows both conflicting statements \
1091             with the topic.",
1092            json_schema_object(serde_json::json!({
1093                "type": "object",
1094                "properties": {
1095                    "limit": {
1096                        "type": "integer",
1097                        "description": "Maximum results (default 5).",
1098                        "minimum": 1,
1099                        "maximum": 100,
1100                    },
1101                },
1102            })),
1103        ),
1104        Tool::new(
1105            "memory_inspect_cluster",
1106            "Show the raw conversations behind one summary. Returns \
1107             the one-line topic (the LLM-generated summary) and the \
1108             source conversations the topic was built from. Use \
1109             after memory_themes when the user asks \"show me the \
1110             raw context behind this\" or \"why does Solo think \
1111             that about cluster Y\". Source items are truncated to \
1112             200 chars unless `full_content` is set.",
1113            json_schema_object(serde_json::json!({
1114                "type": "object",
1115                "properties": {
1116                    "cluster_id": {
1117                        "type": "string",
1118                        "description": "Cluster id to inspect (from memory_themes hits).",
1119                    },
1120                    "full_content": {
1121                        "type": "boolean",
1122                        "description": "If true, episode content is returned verbatim. Default false (truncate to 200 chars + ellipsis).",
1123                    },
1124                },
1125                "required": ["cluster_id"],
1126            })),
1127        ),
1128        // Document tools (v0.7.0+). RAG over user-supplied files —
1129        // markdown notes, PDFs, runbooks, code, etc. Same vector space
1130        // as episodes; same embedder; same HNSW index.
1131        Tool::new(
1132            "memory_ingest_document",
1133            "Read a file from disk and add it to the user's document \
1134             library so it becomes searchable alongside past \
1135             conversations. Use when the user asks you to remember a \
1136             whole file (\"add my notes/runbook.md\", \"ingest this \
1137             PDF\"). The file is split into ~500-token chunks and \
1138             each chunk is embedded; chunks then surface through \
1139             memory_search_docs. Returns the new document id, chunk \
1140             count, and a `deduped` flag (true if the same content \
1141             was already ingested under another id).",
1142            json_schema_object(serde_json::json!({
1143                "type": "object",
1144                "properties": {
1145                    "path": {
1146                        "type": "string",
1147                        "description": "Server-side absolute path to the file to ingest. The file must be readable by the Solo process.",
1148                    },
1149                },
1150                "required": ["path"],
1151            })),
1152        ),
1153        Tool::new(
1154            "memory_search_docs",
1155            "Search across the user's ingested documents by topic or \
1156             phrase. Returns up to `limit` matching chunks, best \
1157             match first, each with the parent document's title + \
1158             source path so you can cite where the answer came from. \
1159             Use when the user asks a question that hinges on \
1160             material they've added as a file (\"what does my \
1161             runbook say about backups?\", \"find the section in the \
1162             notes about the new policy\"). Forgotten documents are \
1163             skipped.",
1164            json_schema_object(serde_json::json!({
1165                "type": "object",
1166                "properties": {
1167                    "query": {
1168                        "type": "string",
1169                        "description": "The query text.",
1170                    },
1171                    "limit": {
1172                        "type": "integer",
1173                        "description": "Maximum results (default 5).",
1174                        "minimum": 1,
1175                        "maximum": 100,
1176                    },
1177                },
1178                "required": ["query"],
1179            })),
1180        ),
1181        Tool::new(
1182            "memory_inspect_document",
1183            "Show one document's metadata plus a preview of every \
1184             chunk it was split into. Use after memory_search_docs \
1185             when the user wants the bigger picture for one hit \
1186             (\"show me the whole document this came from\"), or \
1187             after memory_list_documents to drill into one entry. \
1188             Each chunk preview is truncated to 200 chars.",
1189            json_schema_object(serde_json::json!({
1190                "type": "object",
1191                "properties": {
1192                    "doc_id": {
1193                        "type": "string",
1194                        "description": "Document id to inspect (UUID v7).",
1195                    },
1196                },
1197                "required": ["doc_id"],
1198            })),
1199        ),
1200        Tool::new(
1201            "memory_list_documents",
1202            "List the user's ingested documents, newest first. Use \
1203             when the user asks \"what documents have I added?\" or \
1204             \"show me my files\". Returns a paginated index — pass \
1205             `offset` to page further back. Forgotten documents are \
1206             hidden by default; set `include_forgotten=true` to see \
1207             them too.",
1208            json_schema_object(serde_json::json!({
1209                "type": "object",
1210                "properties": {
1211                    "limit": {
1212                        "type": "integer",
1213                        "description": "Maximum results per page (default 20).",
1214                        "minimum": 1,
1215                        "maximum": 100,
1216                    },
1217                    "offset": {
1218                        "type": "integer",
1219                        "description": "Number of rows to skip (for paging). Default 0.",
1220                        "minimum": 0,
1221                    },
1222                    "include_forgotten": {
1223                        "type": "boolean",
1224                        "description": "If true, also include documents the user has forgotten. Default false.",
1225                    },
1226                },
1227            })),
1228        ),
1229        Tool::new(
1230            "memory_forget_document",
1231            "Drop one document from the user's library by id. Use \
1232             when the user asks you to forget a specific file \
1233             (\"forget my old runbook\"). The document's chunks stop \
1234             appearing in memory_search_docs and the vectors are \
1235             tombstoned in the index. The chunk rows themselves are \
1236             kept for forensic value (a future restore command can \
1237             undo this).",
1238            json_schema_object(serde_json::json!({
1239                "type": "object",
1240                "properties": {
1241                    "doc_id": {
1242                        "type": "string",
1243                        "description": "Document id to forget (UUID v7).",
1244                    },
1245                },
1246                "required": ["doc_id"],
1247            })),
1248        ),
1249    ]
1250}
1251
1252fn json_schema_object(value: serde_json::Value) -> serde_json::Map<String, serde_json::Value> {
1253    match value {
1254        serde_json::Value::Object(map) => map,
1255        _ => panic!("json_schema_object: input must be an object"),
1256    }
1257}
1258
1259/// Names of every tool this server exposes, in registration order.
1260///
1261/// Exposed for cross-crate consumers (notably `solo doctor
1262/// --check-mcp-compat`) that want the name list without paying the
1263/// cost of building full `rmcp::Tool` records (which allocate JSON
1264/// schemas). The registration order matches `build_tools()` so any
1265/// drift between the two would be caught by the cross-provider regex
1266/// test which iterates `build_tools()`.
1267pub fn tool_names() -> Vec<&'static str> {
1268    vec![
1269        "memory_remember",
1270        // v0.9.2 — batched-remember for agentic clients (solo-jarvis).
1271        "memory_remember_batch",
1272        "memory_recall",
1273        "memory_forget",
1274        "memory_inspect",
1275        "memory_themes",
1276        "memory_facts_about",
1277        "memory_contradictions",
1278        "memory_inspect_cluster",
1279        // Document tools added in v0.7.0:
1280        "memory_ingest_document",
1281        "memory_search_docs",
1282        "memory_inspect_document",
1283        "memory_list_documents",
1284        "memory_forget_document",
1285    ]
1286}
1287
1288// ---------------------------------------------------------------------------
1289// Tool handlers
1290// ---------------------------------------------------------------------------
1291
1292impl SoloMcpServer {
1293    async fn handle_remember(
1294        &self,
1295        args: RememberArgs,
1296    ) -> std::result::Result<CallToolResult, McpError> {
1297        let content = args.content.trim_end().to_string();
1298        if content.is_empty() {
1299            return Err(McpError::invalid_params(
1300                "memory_remember: content must not be empty".to_string(),
1301                None,
1302            ));
1303        }
1304        validate_salience(args.salience)?;
1305        let embedding: solo_core::Embedding = self
1306            .inner
1307            .tenant
1308            .embedder()
1309            .embed(&content)
1310            .await
1311            .map_err(solo_to_mcp)?;
1312        let episode = Episode {
1313            memory_id: MemoryId::new(),
1314            ts_ms: chrono::Utc::now().timestamp_millis(),
1315            source_type: args.source_type.unwrap_or_else(|| "user_message".into()),
1316            source_id: args.source_id,
1317            content,
1318            encoding_context: EncodingContext::default(),
1319            provenance: None,
1320            confidence: Confidence::new(0.9).unwrap(),
1321            strength: 0.5,
1322            // v0.9.2: caller-supplied salience overrides the default. The
1323            // `validate_salience` call above has already rejected NaN /
1324            // out-of-range values.
1325            salience: args.salience.unwrap_or(0.5),
1326            tier: Tier::Hot,
1327        };
1328        let mid = self
1329            .inner
1330            .tenant
1331            .write()
1332            .remember_as(self.inner.audit_principal.clone(), episode, embedding)
1333            .await
1334            .map_err(solo_to_mcp)?;
1335        Ok(CallToolResult::success(vec![Content::text(format!(
1336            "remembered {mid}"
1337        ))]))
1338    }
1339
1340    /// v0.9.2 — handler for `memory_remember_batch`.
1341    ///
1342    /// Pipeline (mirrors `handle_remember` over N items):
1343    ///   1. Validate batch (non-empty, ≤ `MAX_REMEMBER_BATCH_SIZE`,
1344    ///      per-item content non-empty, per-item salience in [0.0, 1.0]).
1345    ///   2. Embed all items sequentially via the tenant's embedder.
1346    ///      We don't `join_all` here because the in-process embedder
1347    ///      paths today (stub, local-Anthropic, OpenAI) are individually
1348    ///      fast and serial is robust against rate-limit surprises (per
1349    ///      dev-log 0120 §8 R2 mitigation: existing embedder
1350    ///      throttling guards parallel fan-out; serial gives identical
1351    ///      semantics with simpler error paths). Parallel fan-out is a
1352    ///      v0.9.3 optimization once the batch tool has live traffic.
1353    ///   3. Build `Vec<(Episode, Embedding)>` with default Confidence /
1354    ///      strength / tier — same shape as single-Remember.
1355    ///   4. Dispatch via `WriteHandle::remember_batch_as`, which wraps
1356    ///      every INSERT in ONE `BEGIN IMMEDIATE` tx (ADR-0003 invariant
1357    ///      preserved).
1358    ///   5. Reply is `Vec<MemoryId>` in input order; serialise to JSON.
1359    async fn handle_remember_batch(
1360        &self,
1361        args: RememberBatchArgs,
1362        progress: Option<crate::mcp_progress::ProgressReporter>,
1363    ) -> std::result::Result<CallToolResult, McpError> {
1364        // 1. Batch-shape validation. The writer-actor will re-check
1365        //    `MAX_REMEMBER_BATCH_SIZE` (dev-log 0120 §3 Decision F) and
1366        //    reject with `InvalidInput` — we mirror the check here to
1367        //    avoid the round-trip into the writer + the embedder calls
1368        //    when the request is obviously over-cap.
1369        if args.items.is_empty() {
1370            return Err(McpError::invalid_params(
1371                "memory_remember_batch: items must not be empty".to_string(),
1372                None,
1373            ));
1374        }
1375        if args.items.len() > solo_storage::MAX_REMEMBER_BATCH_SIZE {
1376            return Err(McpError::invalid_params(
1377                format!(
1378                    "memory_remember_batch: {} items exceeds MAX_REMEMBER_BATCH_SIZE = {}",
1379                    args.items.len(),
1380                    solo_storage::MAX_REMEMBER_BATCH_SIZE,
1381                ),
1382                None,
1383            ));
1384        }
1385        for (i, item) in args.items.iter().enumerate() {
1386            if item.content.trim_end().is_empty() {
1387                return Err(McpError::invalid_params(
1388                    format!("memory_remember_batch: items[{i}].content must not be empty"),
1389                    None,
1390                ));
1391            }
1392            validate_salience(item.salience).map_err(|e| {
1393                // Re-wrap with the index so the caller can pinpoint
1394                // which item tripped the validator.
1395                McpError::invalid_params(
1396                    format!("memory_remember_batch: items[{i}].{}", e.message),
1397                    None,
1398                )
1399            })?;
1400        }
1401
1402        // v0.11.0 P3: progress emission is gated on batch size — below
1403        // the threshold (50 items) the wire-overhead of progress
1404        // notifications outweighs the UX benefit. Above threshold +
1405        // client opted in (`reporter.is_some()`), emit one event per
1406        // `MCP_REMEMBER_BATCH_PROGRESS_EMIT_EVERY` items during the
1407        // embed loop + one terminal "embedded" + one "inserted" event.
1408        let total = args.items.len() as u64;
1409        let progress_active = progress.is_some()
1410            && args.items.len() > crate::mcp_progress::MCP_REMEMBER_BATCH_PROGRESS_ITEM_THRESHOLD;
1411        let progress_reporter = if progress_active { progress.as_ref() } else { None };
1412
1413        // 2. Embed each item. Serial fan-out (see doc comment above).
1414        let embedder = self.inner.tenant.embedder();
1415        let now_ms = chrono::Utc::now().timestamp_millis();
1416        let mut pairs: Vec<(Episode, solo_core::Embedding)> = Vec::with_capacity(args.items.len());
1417        for (i, item) in args.items.into_iter().enumerate() {
1418            let content = item.content.trim_end().to_string();
1419            let embedding = embedder.embed(&content).await.map_err(solo_to_mcp)?;
1420            let episode = Episode {
1421                memory_id: MemoryId::new(),
1422                ts_ms: now_ms,
1423                source_type: item.source_type.unwrap_or_else(|| "user_message".into()),
1424                source_id: item.source_id,
1425                content,
1426                encoding_context: EncodingContext::default(),
1427                provenance: None,
1428                confidence: Confidence::new(0.9).unwrap(),
1429                strength: 0.5,
1430                salience: item.salience.unwrap_or(0.5),
1431                tier: Tier::Hot,
1432            };
1433            pairs.push((episode, embedding));
1434            // v0.11.0 P3 checkpoint A — embed progress, every N items.
1435            // `(i + 1) % EMIT_EVERY == 0` emits at items 25, 50, 75, ...
1436            // The terminal "embedded" event below covers any remainder.
1437            let done = (i + 1) as u64;
1438            if (i + 1) % crate::mcp_progress::MCP_REMEMBER_BATCH_PROGRESS_EMIT_EVERY == 0 {
1439                crate::mcp_progress::report_if_some(
1440                    progress_reporter,
1441                    done,
1442                    Some(total),
1443                    Some("embedding"),
1444                );
1445            }
1446        }
1447
1448        // v0.11.0 P3 checkpoint B — all items embedded; about to land
1449        // in writer-actor. Always-emitted (when progress_active) so a
1450        // batch that wasn't a multiple of EMIT_EVERY still gets a
1451        // final embed-phase event.
1452        crate::mcp_progress::report_if_some(
1453            progress_reporter,
1454            total,
1455            Some(total),
1456            Some("embedded"),
1457        );
1458
1459        // 3. Dispatch into the writer-actor. The batch lands as one tx.
1460        let memory_ids = self
1461            .inner
1462            .tenant
1463            .write()
1464            .remember_batch_as(self.inner.audit_principal.clone(), pairs)
1465            .await
1466            .map_err(solo_to_mcp)?;
1467
1468        // v0.11.0 P3 checkpoint C — writer-actor committed. The reply
1469        // body below also lands in the POST response, but this event
1470        // gives a client subscribed to the GET stream early confirmation
1471        // that the row is committed without waiting for the POST to
1472        // return (network buffering can stall the POST response
1473        // marginally; the SSE event is immediate).
1474        crate::mcp_progress::report_if_some(
1475            progress_reporter,
1476            total,
1477            Some(total),
1478            Some("inserted"),
1479        );
1480
1481        // 4. Reply: JSON-serialised array of memory ids in input order.
1482        //    Stringified so MCP clients see UUID strings (matches single
1483        //    `memory_remember`'s reply shape — both speak strings on
1484        //    the wire).
1485        let ids_as_strings: Vec<String> =
1486            memory_ids.iter().map(|m| m.to_string()).collect();
1487        let body = serde_json::to_string(&ids_as_strings).map_err(|e| {
1488            McpError::internal_error(format!("serialize batch reply: {e}"), None)
1489        })?;
1490        Ok(CallToolResult::success(vec![Content::text(body)]))
1491    }
1492
1493    async fn handle_recall(
1494        &self,
1495        args: RecallArgs,
1496    ) -> std::result::Result<CallToolResult, McpError> {
1497        // Pipeline lives in solo-query; the transport just formats the
1498        // result. solo_query::run_recall validates empty queries
1499        // (returns InvalidInput → invalid_params via solo_to_mcp).
1500        let result = solo_query::run_recall(
1501            self.inner.tenant.as_ref(),
1502            self.inner.audit_principal.clone(),
1503            &args.query,
1504            args.limit,
1505        )
1506        .await
1507        .map_err(solo_to_mcp)?;
1508
1509        if result.hits.is_empty() {
1510            return Ok(CallToolResult::success(vec![Content::text(format!(
1511                "no matches (index has {} vectors)",
1512                result.index_len
1513            ))]));
1514        }
1515        let body = serde_json::to_string_pretty(&result.hits).unwrap_or_else(|_| String::new());
1516        Ok(CallToolResult::success(vec![Content::text(body)]))
1517    }
1518
1519    async fn handle_forget(
1520        &self,
1521        args: ForgetArgs,
1522    ) -> std::result::Result<CallToolResult, McpError> {
1523        let mid = MemoryId::from_str(&args.memory_id).map_err(|e| {
1524            McpError::invalid_params(format!("invalid memory_id: {e}"), None)
1525        })?;
1526        self.inner
1527            .tenant
1528            .write()
1529            .forget_as(self.inner.audit_principal.clone(), mid, args.reason)
1530            .await
1531            .map_err(solo_to_mcp)?;
1532        Ok(CallToolResult::success(vec![Content::text(format!(
1533            "forgotten {mid}"
1534        ))]))
1535    }
1536
1537    async fn handle_inspect(
1538        &self,
1539        args: InspectArgs,
1540    ) -> std::result::Result<CallToolResult, McpError> {
1541        let mid = MemoryId::from_str(&args.memory_id).map_err(|e| {
1542            McpError::invalid_params(format!("invalid memory_id: {e}"), None)
1543        })?;
1544        // Pipeline lives in solo-query::inspect; transports just format.
1545        let row = solo_query::inspect_one(
1546            self.inner.tenant.read(),
1547            self.inner.tenant.audit(),
1548            self.inner.audit_principal.clone(),
1549            mid,
1550        )
1551        .await
1552        .map_err(solo_to_mcp)?;
1553        let body = serde_json::to_string_pretty(&row).unwrap_or_else(|_| String::new());
1554        Ok(CallToolResult::success(vec![Content::text(body)]))
1555    }
1556
1557    // Path 1 derived-layer handlers (v0.4.0+). Each one delegates to a
1558    // single solo-query::derived pipeline and serialises the result Vec
1559    // to pretty JSON for the MCP wire. Empty result → JSON empty array
1560    // `[]` (not a special-case "no matches" string) so MCP clients can
1561    // parse uniformly.
1562
1563    async fn handle_themes(
1564        &self,
1565        args: ThemesArgs,
1566    ) -> std::result::Result<CallToolResult, McpError> {
1567        let hits = solo_query::themes(
1568            self.inner.tenant.read(),
1569            self.inner.tenant.audit(),
1570            self.inner.audit_principal.clone(),
1571            args.window_days,
1572            args.limit,
1573        )
1574        .await
1575        .map_err(solo_to_mcp)?;
1576        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1577        Ok(CallToolResult::success(vec![Content::text(body)]))
1578    }
1579
1580    async fn handle_facts_about(
1581        &self,
1582        args: FactsAboutArgs,
1583    ) -> std::result::Result<CallToolResult, McpError> {
1584        if args.subject.trim().is_empty() {
1585            return Err(McpError::invalid_params(
1586                "memory_facts_about: subject must not be empty".to_string(),
1587                None,
1588            ));
1589        }
1590        let hits = solo_query::facts_about(
1591            self.inner.tenant.read(),
1592            self.inner.tenant.audit(),
1593            self.inner.audit_principal.clone(),
1594            &args.subject,
1595            &self.inner.user_aliases,
1596            args.include_as_object,
1597            args.predicate.as_deref(),
1598            args.since_ms,
1599            args.until_ms,
1600            args.limit,
1601        )
1602        .await
1603        .map_err(solo_to_mcp)?;
1604        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1605        Ok(CallToolResult::success(vec![Content::text(body)]))
1606    }
1607
1608    async fn handle_contradictions(
1609        &self,
1610        args: ContradictionsArgs,
1611    ) -> std::result::Result<CallToolResult, McpError> {
1612        let hits = solo_query::contradictions(
1613            self.inner.tenant.read(),
1614            self.inner.tenant.audit(),
1615            self.inner.audit_principal.clone(),
1616            args.limit,
1617        )
1618        .await
1619        .map_err(solo_to_mcp)?;
1620        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1621        Ok(CallToolResult::success(vec![Content::text(body)]))
1622    }
1623
1624    async fn handle_inspect_cluster(
1625        &self,
1626        args: InspectClusterArgs,
1627    ) -> std::result::Result<CallToolResult, McpError> {
1628        if args.cluster_id.trim().is_empty() {
1629            return Err(McpError::invalid_params(
1630                "memory_inspect_cluster: cluster_id must not be empty".to_string(),
1631                None,
1632            ));
1633        }
1634        // `solo_to_mcp` maps `Error::NotFound` → `invalid_params` for
1635        // MCP (the protocol does not have a separate "not found" error
1636        // shape; clients see the message verbatim, which includes the
1637        // cluster_id).
1638        let record = solo_query::inspect_cluster(
1639            self.inner.tenant.read(),
1640            self.inner.tenant.audit(),
1641            self.inner.audit_principal.clone(),
1642            &args.cluster_id,
1643            args.full_content,
1644        )
1645        .await
1646        .map_err(solo_to_mcp)?;
1647        let body = serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
1648        Ok(CallToolResult::success(vec![Content::text(body)]))
1649    }
1650
1651    // Document handlers (v0.7.0+). Each wraps the corresponding writer
1652    // / query API; the MCP wire shape is plain JSON serialisation of
1653    // the returned report / records.
1654
1655    async fn handle_ingest_document(
1656        &self,
1657        args: IngestDocumentArgs,
1658        progress: Option<crate::mcp_progress::ProgressReporter>,
1659    ) -> std::result::Result<CallToolResult, McpError> {
1660        if args.path.trim().is_empty() {
1661            return Err(McpError::invalid_params(
1662                "memory_ingest_document: path must not be empty".to_string(),
1663                None,
1664            ));
1665        }
1666        let path = std::path::PathBuf::from(args.path);
1667        // Defaults match what the daemon uses today (target 500 tokens,
1668        // 50-token overlap). Future: thread a per-call override through
1669        // the args struct if a use case appears.
1670        let chunk_config = solo_storage::document::ChunkConfig::default();
1671
1672        // v0.11.0 P3: ingest checkpoints. The writer-actor's
1673        // `ingest_document_as` is one opaque command that internally
1674        // performs parse → chunk → embed → SQL insert; we bookend it
1675        // with phase-marker progress events. The 4-phase taxonomy
1676        // matches the MCP spec brief — `total=4`, `progress` walks 1
1677        // → 4 — even though phases 1 and 2 (parse, chunk) emit before
1678        // the writer call and 3 and 4 (embed, insert) emit after.
1679        // Real chunk-by-chunk progress would require redesigning the
1680        // writer command shape (cross-cuts ADR-0003); P3's bookend
1681        // pattern stays additive without touching the writer.
1682        const INGEST_TOTAL_PHASES: u64 = 4;
1683        crate::mcp_progress::report_if_some(
1684            progress.as_ref(),
1685            1,
1686            Some(INGEST_TOTAL_PHASES),
1687            Some("parsed"),
1688        );
1689        crate::mcp_progress::report_if_some(
1690            progress.as_ref(),
1691            2,
1692            Some(INGEST_TOTAL_PHASES),
1693            Some("chunked"),
1694        );
1695
1696        let report = self
1697            .inner
1698            .tenant
1699            .write()
1700            .ingest_document_as(self.inner.audit_principal.clone(), path, chunk_config)
1701            .await
1702            .map_err(solo_to_mcp)?;
1703
1704        crate::mcp_progress::report_if_some(
1705            progress.as_ref(),
1706            3,
1707            Some(INGEST_TOTAL_PHASES),
1708            Some("embedded"),
1709        );
1710        // Final event includes the real chunk count from the report;
1711        // the per-event `message` field carries it so clients can
1712        // surface "N chunks indexed" without parsing the POST reply
1713        // body.
1714        crate::mcp_progress::report_if_some(
1715            progress.as_ref(),
1716            INGEST_TOTAL_PHASES,
1717            Some(INGEST_TOTAL_PHASES),
1718            Some(&format!("inserted {} chunks", report.chunks_persisted)),
1719        );
1720
1721        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
1722        Ok(CallToolResult::success(vec![Content::text(body)]))
1723    }
1724
1725    async fn handle_search_docs(
1726        &self,
1727        args: SearchDocsArgs,
1728        progress: Option<crate::mcp_progress::ProgressReporter>,
1729    ) -> std::result::Result<CallToolResult, McpError> {
1730        // v0.11.0 P3: progress emission for search is gated on `top_k`
1731        // (passed via `args.limit`) — below 100 the search completes
1732        // fast enough that progress notifications add wire-overhead
1733        // with no UX benefit (Decision C). Above threshold + client
1734        // opted in, emit 3 phase-marker events around the query call.
1735        let top_k = args.limit as u32;
1736        let progress_active = progress.is_some()
1737            && top_k > crate::mcp_progress::MCP_SEARCH_DOCS_PROGRESS_TOP_K_THRESHOLD;
1738        let progress_reporter = if progress_active { progress.as_ref() } else { None };
1739        const SEARCH_TOTAL_PHASES: u64 = 3;
1740        crate::mcp_progress::report_if_some(
1741            progress_reporter,
1742            1,
1743            Some(SEARCH_TOTAL_PHASES),
1744            Some("hnsw_lookup"),
1745        );
1746
1747        // `solo_query::run_doc_search` validates empty queries (returns
1748        // InvalidInput → invalid_params via solo_to_mcp) and clamps
1749        // limit upstream of the embedder call.
1750        let hits = solo_query::run_doc_search(
1751            self.inner.tenant.as_ref(),
1752            self.inner.audit_principal.clone(),
1753            &args.query,
1754            args.limit,
1755        )
1756        .await
1757        .map_err(solo_to_mcp)?;
1758
1759        crate::mcp_progress::report_if_some(
1760            progress_reporter,
1761            2,
1762            Some(SEARCH_TOTAL_PHASES),
1763            Some("reranked"),
1764        );
1765        crate::mcp_progress::report_if_some(
1766            progress_reporter,
1767            SEARCH_TOTAL_PHASES,
1768            Some(SEARCH_TOTAL_PHASES),
1769            Some(&format!("returning {} hits", hits.len())),
1770        );
1771
1772        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1773        Ok(CallToolResult::success(vec![Content::text(body)]))
1774    }
1775
1776    async fn handle_inspect_document(
1777        &self,
1778        args: InspectDocumentArgs,
1779    ) -> std::result::Result<CallToolResult, McpError> {
1780        let doc_id = DocumentId::from_str(&args.doc_id).map_err(|e| {
1781            McpError::invalid_params(format!("invalid doc_id: {e}"), None)
1782        })?;
1783        let result_opt = solo_query::inspect_document(
1784            self.inner.tenant.read(),
1785            self.inner.tenant.audit(),
1786            self.inner.audit_principal.clone(),
1787            &doc_id,
1788        )
1789        .await
1790        .map_err(solo_to_mcp)?;
1791        match result_opt {
1792            Some(record) => {
1793                let body =
1794                    serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
1795                Ok(CallToolResult::success(vec![Content::text(body)]))
1796            }
1797            None => Err(McpError::invalid_params(
1798                format!("document {doc_id} not found"),
1799                None,
1800            )),
1801        }
1802    }
1803
1804    async fn handle_list_documents(
1805        &self,
1806        args: ListDocumentsArgs,
1807    ) -> std::result::Result<CallToolResult, McpError> {
1808        let rows = solo_query::list_documents(
1809            self.inner.tenant.read(),
1810            self.inner.tenant.audit(),
1811            self.inner.audit_principal.clone(),
1812            args.limit,
1813            args.offset,
1814            args.include_forgotten,
1815        )
1816        .await
1817        .map_err(solo_to_mcp)?;
1818        let body = serde_json::to_string_pretty(&rows).unwrap_or_else(|_| String::new());
1819        Ok(CallToolResult::success(vec![Content::text(body)]))
1820    }
1821
1822    async fn handle_forget_document(
1823        &self,
1824        args: ForgetDocumentArgs,
1825    ) -> std::result::Result<CallToolResult, McpError> {
1826        let doc_id = DocumentId::from_str(&args.doc_id).map_err(|e| {
1827            McpError::invalid_params(format!("invalid doc_id: {e}"), None)
1828        })?;
1829        let report = self
1830            .inner
1831            .tenant
1832            .write()
1833            .forget_document_as(self.inner.audit_principal.clone(), doc_id)
1834            .await
1835            .map_err(solo_to_mcp)?;
1836        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
1837        Ok(CallToolResult::success(vec![Content::text(body)]))
1838    }
1839}
1840
1841#[cfg(test)]
1842mod dispatch_tests {
1843    //! In-process integration tests for the MCP tool surface. We invoke
1844    //! `SoloMcpServer::dispatch_tool` directly (bypasses the rmcp
1845    //! protocol framing + `RequestContext`, which requires a `Peer`
1846    //! that's not constructible outside rmcp internals). The server is
1847    //! constructed against a real WriterActor + ReaderPool +
1848    //! StubEmbedder + StubVectorIndex from `solo_storage::test_support`.
1849    //!
1850    //! Tests live inline in this module rather than `tests/` because an
1851    //! external integration-test exe in `target/debug/deps/mcp_dispatch-*`
1852    //! tripped Windows UAC ERROR_ELEVATION_REQUIRED on the dev machine.
1853    //! The lib test binary doesn't have that issue.
1854    use super::*;
1855    use serde_json::json;
1856    use solo_core::VectorIndex;
1857    use solo_storage::test_support::StubVectorIndex;
1858    use solo_storage::{
1859        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig,
1860        StubEmbedder, TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
1861    };
1862    use std::sync::Arc as StdArc;
1863
1864    fn fake_config(dim: u32) -> SoloConfig {
1865        SoloConfig {
1866            schema_version: 1,
1867            salt_hex: "00000000000000000000000000000000".to_string(),
1868            embedder: EmbedderConfig {
1869                name: "stub".to_string(),
1870                version: "v1".to_string(),
1871                dim,
1872                dtype: "f32".to_string(),
1873            },
1874            identity: IdentityConfig::default(),
1875            documents: solo_storage::DocumentConfig::default(),
1876            auth: None,
1877            audit: solo_storage::AuditSettings::default(),
1878            redaction: solo_storage::RedactionConfig::default(),
1879            llm: None,
1880            triples: solo_storage::TriplesConfig::default(),
1881            sampling: solo_storage::SamplingConfig::default(),
1882        }
1883    }
1884
1885    struct Harness {
1886        server: SoloMcpServer,
1887        _tmp: tempfile::TempDir,
1888        write_handle_extra: Option<solo_storage::WriteHandle>,
1889        join: Option<std::thread::JoinHandle<()>>,
1890    }
1891
1892    impl Harness {
1893        fn new(runtime: &tokio::runtime::Runtime) -> Self {
1894            let tmp = tempfile::TempDir::new().unwrap();
1895            let dim = 16usize;
1896            let hnsw: StdArc<dyn VectorIndex + Send + Sync> = StdArc::new(StubVectorIndex::new(dim));
1897            let embedder: StdArc<dyn solo_core::Embedder> = StdArc::new(StubEmbedder::new("stub", "v1", dim));
1898
1899            let conn = solo_storage::test_support::open_test_db_at(&tmp.path().join("test.db"));
1900            let WriterSpawn { handle, join } = WriterActor::spawn(conn, hnsw.clone());
1901
1902            // ReaderPool's deadpool::Pool needs a live tokio runtime for
1903            // both build + drop; build inside block_on.
1904            let path = tmp.path().join("test.db");
1905            let pool: ReaderPool =
1906                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });
1907
1908            let tenant_id = solo_core::TenantId::default_tenant();
1909            let tenant_handle = StdArc::new(
1910                TenantHandle::from_parts_for_tests(
1911                    tenant_id.clone(),
1912                    fake_config(dim as u32),
1913                    path.clone(),
1914                    tmp.path().to_path_buf(),
1915                    0, // embedder_id; tests using full embedder_id path build their own
1916                    hnsw,
1917                    embedder.clone(),
1918                    handle.clone(),
1919                    std::thread::spawn(|| {}),
1920                    pool,
1921                ),
1922            );
1923            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
1924            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
1925                tmp.path().to_path_buf(),
1926                key,
1927                embedder,
1928                tenant_handle.clone(),
1929            ));
1930            let server = SoloMcpServer::new_for_tenant(registry, tenant_handle, Vec::new());
1931            Harness {
1932                server,
1933                _tmp: tmp,
1934                write_handle_extra: Some(handle),
1935                join: Some(join),
1936            }
1937        }
1938
1939        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
1940            // The whole shutdown runs inside block_on so deadpool-sqlite's
1941            // drop (which schedules cleanup on the active runtime) sees a
1942            // live reactor. Without this, dropping the SoloMcpServer
1943            // (which holds the ReaderPool through its Arc<Inner>) panics
1944            // with "no reactor running".
1945            let join = self.join.take();
1946            let extra = self.write_handle_extra.take();
1947            runtime.block_on(async move {
1948                drop(extra);
1949                drop(self.server);
1950                drop(self._tmp);
1951                if let Some(join) = join {
1952                    let (tx, rx) = std::sync::mpsc::channel();
1953                    std::thread::spawn(move || {
1954                        let _ = tx.send(join.join());
1955                    });
1956                    tokio::task::spawn_blocking(move || {
1957                        rx.recv_timeout(std::time::Duration::from_secs(5))
1958                    })
1959                    .await
1960                    .expect("blocking task")
1961                    .expect("writer thread did not exit within 5s")
1962                    .expect("writer thread panicked");
1963                }
1964            });
1965        }
1966    }
1967
1968    fn rt() -> tokio::runtime::Runtime {
1969        tokio::runtime::Builder::new_multi_thread()
1970            .worker_threads(2)
1971            .enable_all()
1972            .build()
1973            .unwrap()
1974    }
1975
1976    /// Pull the first Content::text body out of a CallToolResult. Use
1977    /// serde_json roundtrip as a robust extractor — `Content`'s public
1978    /// API doesn't directly expose the inner text without going through
1979    /// pattern-matching on RawContent.
1980    fn first_text(r: &rmcp::model::CallToolResult) -> String {
1981        let first = r.content.first().expect("at least one content item");
1982        let v = serde_json::to_value(first).expect("content serialises");
1983        v.get("text")
1984            .and_then(|t| t.as_str())
1985            .map(|s| s.to_string())
1986            .unwrap_or_else(|| format!("{v}"))
1987    }
1988
1989    #[test]
1990    fn tools_list_returns_fourteen_canonical_tools() {
1991        let runtime = rt();
1992        let h = Harness::new(&runtime);
1993        let tools = h.server.dispatch_list_tools();
1994        let names: Vec<&str> = tools.iter().map(|t| t.name.as_ref()).collect();
1995        assert_eq!(
1996            names,
1997            vec![
1998                "memory_remember",
1999                // v0.9.2 — batched-remember for agentic clients.
2000                "memory_remember_batch",
2001                "memory_recall",
2002                "memory_forget",
2003                "memory_inspect",
2004                // Derived-layer tools added in v0.4.0:
2005                "memory_themes",
2006                "memory_facts_about",
2007                "memory_contradictions",
2008                // Added in v0.5.0 (Priority 3):
2009                "memory_inspect_cluster",
2010                // Document tools added in v0.7.0:
2011                "memory_ingest_document",
2012                "memory_search_docs",
2013                "memory_inspect_document",
2014                "memory_list_documents",
2015                "memory_forget_document",
2016            ]
2017        );
2018        for t in &tools {
2019            // rmcp 1.x: Tool.description is Option<Cow<'static, str>>.
2020            let desc = t.description.as_deref().unwrap_or("");
2021            assert!(!desc.is_empty(), "{} description empty", t.name);
2022            let _schema = t.schema_as_json_value();
2023            // `required` is intentionally absent on memory_themes +
2024            // memory_contradictions + memory_list_documents (all args
2025            // optional with defaults). memory_facts_about has required
2026            // = ["subject"], etc. We don't assert per-tool 'required'
2027            // shape here; the schema's `properties` field is the more
2028            // important signal and is always present.
2029        }
2030        h.shutdown(&runtime);
2031    }
2032
2033    #[test]
2034    fn themes_returns_json_array_on_empty_db() {
2035        let runtime = rt();
2036        let h = Harness::new(&runtime);
2037        runtime.block_on(async {
2038            let r = h
2039                .server
2040                .dispatch_tool("memory_themes", json!({}), None)
2041                .await
2042                .expect("themes succeeds");
2043            let text = first_text(&r);
2044            // Empty derived layer → empty array JSON. Parses cleanly.
2045            let v: serde_json::Value =
2046                serde_json::from_str(&text).expect("parses as json");
2047            assert!(v.is_array(), "expected array, got: {text}");
2048            assert_eq!(v.as_array().unwrap().len(), 0);
2049        });
2050        h.shutdown(&runtime);
2051    }
2052
2053    #[test]
2054    fn themes_passes_through_window_and_limit_args() {
2055        let runtime = rt();
2056        let h = Harness::new(&runtime);
2057        runtime.block_on(async {
2058            // Should not crash with optional + integer args present.
2059            let r = h
2060                .server
2061                .dispatch_tool(
2062                    "memory_themes",
2063                    json!({ "window_days": 7, "limit": 20 }),
2064                    None,
2065                )
2066                .await
2067                .expect("themes with args succeeds");
2068            let text = first_text(&r);
2069            let v: serde_json::Value =
2070                serde_json::from_str(&text).expect("parses as json");
2071            assert!(v.is_array());
2072        });
2073        h.shutdown(&runtime);
2074    }
2075
2076    #[test]
2077    fn facts_about_rejects_empty_subject() {
2078        let runtime = rt();
2079        let h = Harness::new(&runtime);
2080        runtime.block_on(async {
2081            let err = h
2082                .server
2083                .dispatch_tool(
2084                    "memory_facts_about",
2085                    json!({ "subject": "   " }),
2086                    None,
2087                )
2088                .await
2089                .expect_err("empty subject must error");
2090            // McpError doesn't expose a clean kind/message accessor; just
2091            // verify the error fires (validation path reached).
2092            let s = format!("{err:?}");
2093            assert!(
2094                s.to_lowercase().contains("subject")
2095                    || s.to_lowercase().contains("invalid"),
2096                "got: {s}"
2097            );
2098        });
2099        h.shutdown(&runtime);
2100    }
2101
2102    #[test]
2103    fn facts_about_returns_array_for_unknown_subject() {
2104        let runtime = rt();
2105        let h = Harness::new(&runtime);
2106        runtime.block_on(async {
2107            let r = h
2108                .server
2109                .dispatch_tool(
2110                    "memory_facts_about",
2111                    json!({ "subject": "NobodyKnowsThisSubject" }),
2112                    None,
2113                )
2114                .await
2115                .expect("facts_about with unknown subject succeeds");
2116            let text = first_text(&r);
2117            let v: serde_json::Value =
2118                serde_json::from_str(&text).expect("parses as json");
2119            assert_eq!(v.as_array().unwrap().len(), 0);
2120        });
2121        h.shutdown(&runtime);
2122    }
2123
2124    #[test]
2125    fn facts_about_accepts_include_as_object_arg() {
2126        // Asserts the v0.5.1 P8 arg is parsed (serde default lets it
2127        // be omitted) and forwarded to the query lib without choking
2128        // the dispatcher. We don't seed triples — what we need to
2129        // verify is that the optional bool flows through. Both with
2130        // and without the arg, dispatch succeeds and returns an
2131        // empty array. (Functional coverage of the object-position
2132        // widening lives in the query-crate tests.)
2133        let runtime = rt();
2134        let h = Harness::new(&runtime);
2135        runtime.block_on(async {
2136            // With include_as_object=true.
2137            let r = h
2138                .server
2139                .dispatch_tool(
2140                    "memory_facts_about",
2141                    json!({ "subject": "Maya", "include_as_object": true }),
2142                    None,
2143                )
2144                .await
2145                .expect("dispatch with include_as_object=true succeeds");
2146            let v: serde_json::Value = serde_json::from_str(&first_text(&r))
2147                .expect("parses as json");
2148            assert_eq!(v.as_array().unwrap().len(), 0);
2149
2150            // Omitted entirely — must default to false (no error).
2151            let r = h
2152                .server
2153                .dispatch_tool(
2154                    "memory_facts_about",
2155                    json!({ "subject": "Maya" }),
2156                    None,
2157                )
2158                .await
2159                .expect("dispatch without include_as_object succeeds (default false)");
2160            let v: serde_json::Value = serde_json::from_str(&first_text(&r))
2161                .expect("parses as json");
2162            assert_eq!(v.as_array().unwrap().len(), 0);
2163        });
2164        h.shutdown(&runtime);
2165    }
2166
2167    #[test]
2168    fn contradictions_returns_json_array_on_empty_db() {
2169        let runtime = rt();
2170        let h = Harness::new(&runtime);
2171        runtime.block_on(async {
2172            let r = h
2173                .server
2174                .dispatch_tool("memory_contradictions", json!({}), None)
2175                .await
2176                .expect("contradictions succeeds");
2177            let text = first_text(&r);
2178            let v: serde_json::Value =
2179                serde_json::from_str(&text).expect("parses as json");
2180            assert!(v.is_array());
2181            assert_eq!(v.as_array().unwrap().len(), 0);
2182        });
2183        h.shutdown(&runtime);
2184    }
2185
2186    #[test]
2187    fn remember_then_recall_round_trip() {
2188        let runtime = rt();
2189        let h = Harness::new(&runtime);
2190        // Use &h.server directly (no clone) so the only outstanding
2191        // reference at shutdown time is the harness's own. The clone
2192        // path triggered a 5-second writer-thread timeout because the
2193        // local clone held an Arc<Inner> with its own WriteHandle past
2194        // h.shutdown().
2195        runtime.block_on(async {
2196            let r = h
2197                .server
2198                .dispatch_tool("memory_remember", json!({ "content": "the cat sat on the mat" }), None)
2199                .await
2200                .expect("remember succeeds");
2201            let text = first_text(&r);
2202            assert!(text.starts_with("remembered "), "got: {text}");
2203
2204            let r = h
2205                .server
2206                .dispatch_tool(
2207                    "memory_recall",
2208                    json!({ "query": "the cat sat on the mat", "limit": 5 }),
2209                    None,
2210                )
2211                .await
2212                .expect("recall succeeds");
2213            let text = first_text(&r);
2214            assert!(text.contains("the cat sat on the mat"), "got: {text}");
2215        });
2216        h.shutdown(&runtime);
2217    }
2218
2219    #[test]
2220    fn forget_excludes_row_from_subsequent_recall() {
2221        let runtime = rt();
2222        let h = Harness::new(&runtime);
2223
2224        runtime.block_on(async {
2225            let r = h
2226                .server
2227                .dispatch_tool("memory_remember", json!({ "content": "to be forgotten" }), None)
2228                .await
2229                .unwrap();
2230            let text = first_text(&r);
2231            let mid = text.strip_prefix("remembered ").unwrap().to_string();
2232
2233            h.server
2234                .dispatch_tool(
2235                    "memory_forget",
2236                    json!({ "memory_id": mid, "reason": "test" }),
2237                    None,
2238                )
2239                .await
2240                .expect("forget succeeds");
2241
2242            let r = h
2243                .server
2244                .dispatch_tool(
2245                    "memory_recall",
2246                    json!({ "query": "to be forgotten", "limit": 5 }),
2247                    None,
2248                )
2249                .await
2250                .unwrap();
2251            let text = first_text(&r);
2252            assert!(
2253                !text.contains(r#""content": "to be forgotten""#),
2254                "forgotten row should be excluded; got: {text}"
2255            );
2256        });
2257        h.shutdown(&runtime);
2258    }
2259
2260    #[test]
2261    fn empty_remember_returns_invalid_params() {
2262        let runtime = rt();
2263        let h = Harness::new(&runtime);
2264        runtime.block_on(async {
2265            let err = h
2266                .server
2267                .dispatch_tool("memory_remember", json!({ "content": "" }), None)
2268                .await
2269                .unwrap_err();
2270            assert!(format!("{err:?}").contains("must not be empty"));
2271        });
2272        h.shutdown(&runtime);
2273    }
2274
2275    #[test]
2276    fn empty_recall_query_returns_invalid_params() {
2277        let runtime = rt();
2278        let h = Harness::new(&runtime);
2279        runtime.block_on(async {
2280            let err = h
2281                .server
2282                .dispatch_tool("memory_recall", json!({ "query": "   " }), None)
2283                .await
2284                .unwrap_err();
2285            assert!(format!("{err:?}").contains("must not be empty"));
2286        });
2287        h.shutdown(&runtime);
2288    }
2289
2290    #[test]
2291    fn inspect_with_invalid_id_returns_invalid_params() {
2292        let runtime = rt();
2293        let h = Harness::new(&runtime);
2294        runtime.block_on(async {
2295            let err = h
2296                .server
2297                .dispatch_tool("memory_inspect", json!({ "memory_id": "not-a-uuid" }), None)
2298                .await
2299                .unwrap_err();
2300            assert!(format!("{err:?}").contains("invalid memory_id"));
2301        });
2302        h.shutdown(&runtime);
2303    }
2304
2305    #[test]
2306    fn forget_unknown_id_returns_invalid_params() {
2307        let runtime = rt();
2308        let h = Harness::new(&runtime);
2309        runtime.block_on(async {
2310            // Valid UUID format but not in episodes — handle_forget
2311            // surfaces NotFound, mapped to invalid_params per
2312            // solo_to_mcp.
2313            let err = h
2314                .server
2315                .dispatch_tool(
2316                    "memory_forget",
2317                    json!({ "memory_id": "00000000-0000-7000-8000-000000000000" }),
2318                    None,
2319                )
2320                .await
2321                .unwrap_err();
2322            assert!(format!("{err:?}").contains("not found"));
2323        });
2324        h.shutdown(&runtime);
2325    }
2326
2327    #[test]
2328    fn unknown_tool_name_returns_invalid_params() {
2329        let runtime = rt();
2330        let h = Harness::new(&runtime);
2331        runtime.block_on(async {
2332            let err = h
2333                .server
2334                .dispatch_tool("memory.summon", json!({}), None)
2335                .await
2336                .unwrap_err();
2337            assert!(format!("{err:?}").contains("unknown tool"));
2338        });
2339        h.shutdown(&runtime);
2340    }
2341
2342    /// Regression guard for v0.4.1's MCP tool name fix, generalised
2343    /// in v0.5.0 Priority 4 to cover **all three** major LLM
2344    /// providers, not just Anthropic.
2345    ///
2346    /// Each provider enforces its own tool-name regex on the
2347    /// function-calling wire. A tool name has to satisfy ALL of them
2348    /// to be portable across clients:
2349    ///
2350    ///   - **Anthropic**: `^[a-zA-Z0-9_-]{1,64}$` (what shipped in
2351    ///     v0.4.1; failing this rejects the entire toolset on Claude
2352    ///     Desktop / Cursor / Claude Code with
2353    ///     `FrontendRemoteMcpToolDefinition.name: String should
2354    ///     match pattern ...`).
2355    ///   - **OpenAI** function-calling: `^[a-zA-Z_][a-zA-Z0-9_-]*$`
2356    ///     with length ≤ 64 (must start with letter or underscore).
2357    ///   - **Gemini** function-calling: documented as a-z, A-Z, 0-9,
2358    ///     underscores and dashes; some sources also allow dots. We
2359    ///     use the conservative intersection — must start with
2360    ///     letter or underscore, alphanumeric + underscore only (no
2361    ///     hyphen, no dot), length ≤ 63. This is the strictest of
2362    ///     the three patterns, so any tool that passes it also
2363    ///     passes the other two. Sources differ on whether Gemini
2364    ///     accepts dots or hyphens; the strictest reading guards us
2365    ///     against the future where one provider tightens the regex
2366    ///     (which is the failure mode v0.4.1 hit on Anthropic). See
2367    ///     <https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/docs/api/google/generativeai/protos/FunctionDeclaration.md>
2368    ///     and <https://ai.google.dev/gemini-api/docs/function-calling>.
2369    ///
2370    /// Lesson banked v0.3 #8: rmcp framing tests pass dot-named
2371    /// tools fine because rmcp's own client-side validation is
2372    /// permissive. Only the downstream provider API enforces the
2373    /// regex. This test gates the names at `cargo test` time so any
2374    /// future tool-name change has to pass all three provider
2375    /// regexes before reaching real clients.
2376    #[test]
2377    fn tool_names_match_cross_provider_regex() {
2378        /// Anthropic API name regex: `^[a-zA-Z0-9_-]{1,64}$`.
2379        fn passes_anthropic(name: &str) -> bool {
2380            let len = name.len();
2381            if !(1..=64).contains(&len) {
2382                return false;
2383            }
2384            name.chars()
2385                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
2386        }
2387
2388        /// OpenAI function-calling name regex:
2389        /// `^[a-zA-Z_][a-zA-Z0-9_-]*$`, length ≤ 64.
2390        fn passes_openai(name: &str) -> bool {
2391            let len = name.len();
2392            if !(1..=64).contains(&len) {
2393                return false;
2394            }
2395            let mut chars = name.chars();
2396            let first = match chars.next() {
2397                Some(c) => c,
2398                None => return false,
2399            };
2400            if !(first.is_ascii_alphabetic() || first == '_') {
2401                return false;
2402            }
2403            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
2404        }
2405
2406        /// Gemini function-calling name regex (conservative
2407        /// reading): `^[a-zA-Z_][a-zA-Z0-9_]*$`, length ≤ 63. No
2408        /// hyphen, no dot — strictest of the three so any name that
2409        /// passes this passes the other two.
2410        fn passes_gemini(name: &str) -> bool {
2411            let len = name.len();
2412            if !(1..=63).contains(&len) {
2413                return false;
2414            }
2415            let mut chars = name.chars();
2416            let first = match chars.next() {
2417                Some(c) => c,
2418                None => return false,
2419            };
2420            if !(first.is_ascii_alphabetic() || first == '_') {
2421                return false;
2422            }
2423            chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
2424        }
2425
2426        let tools = build_tools();
2427        assert_eq!(
2428            tools.len(),
2429            14,
2430            "expected 14 tools in v0.9.2 (8 v0.5.x + 5 document tools + remember_batch)"
2431        );
2432        // Sanity-check that tool_names() agrees with build_tools().
2433        let tool_name_strings: Vec<String> =
2434            tools.iter().map(|t| t.name.to_string()).collect();
2435        let public_names: Vec<String> =
2436            super::tool_names().iter().map(|s| s.to_string()).collect();
2437        assert_eq!(
2438            tool_name_strings, public_names,
2439            "tool_names() drifted from build_tools() — keep them in sync"
2440        );
2441
2442        for t in tools {
2443            assert!(
2444                passes_anthropic(&t.name),
2445                "tool name {:?} fails Anthropic regex \
2446                 ^[a-zA-Z0-9_-]{{1,64}}$ — see v0.3 lesson #8",
2447                t.name
2448            );
2449            assert!(
2450                passes_openai(&t.name),
2451                "tool name {:?} fails OpenAI function-calling regex \
2452                 ^[a-zA-Z_][a-zA-Z0-9_-]*$ (len ≤ 64)",
2453                t.name
2454            );
2455            assert!(
2456                passes_gemini(&t.name),
2457                "tool name {:?} fails Gemini function-calling regex \
2458                 ^[a-zA-Z_][a-zA-Z0-9_]*$ (len ≤ 63, strict)",
2459                t.name
2460            );
2461        }
2462    }
2463
2464    /// Regression guard for the v0.5.0 Priority 4 jargon pass.
2465    ///
2466    /// Tool descriptions and `get_info().instructions` are the first
2467    /// (and often only) thing a calling LLM reads when its
2468    /// tool-search mechanism decides whether Solo's tools are
2469    /// relevant. Earlier descriptions leaned on Solo-internal
2470    /// vocabulary (`SPO`, `Steward`, `LEFT JOIN`, `candidate pair`,
2471    /// `tagged_with`) which doesn't pattern-match natural-language
2472    /// agent queries like "what do you know about Alex?" — that's
2473    /// the load-bearing v0.5.0 finding from the 2026-05-14
2474    /// thesis-test in Claude Desktop.
2475    ///
2476    /// This test pins the de-jargoning by forbidding the old
2477    /// vocabulary from appearing in any user-facing text. Future
2478    /// contributors who reach for jargon trip the test and have to
2479    /// pick plain-English phrasing instead.
2480    #[test]
2481    fn tool_descriptions_avoid_internal_jargon() {
2482        // Case-insensitive substring match. Drawn from the
2483        // pre-Priority-4 descriptions; expand only if a new term
2484        // creeps in.
2485        const FORBIDDEN: &[&str] = &[
2486            "SPO",
2487            "Steward",
2488            "Steward-flagged",
2489            "LEFT JOIN",
2490            "candidate pair",
2491            "candidate_pair",
2492            "tagged_with",
2493        ];
2494
2495        fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
2496            haystack.to_lowercase().contains(&needle.to_lowercase())
2497        }
2498
2499        // 1. Each tool description.
2500        for t in build_tools() {
2501            let desc = t.description.as_deref().unwrap_or("");
2502            for term in FORBIDDEN {
2503                assert!(
2504                    !contains_case_insensitive(desc, term),
2505                    "tool {:?} description contains forbidden jargon \
2506                     {:?} — rewrite in plain English (see v0.5.0 \
2507                     Priority 4)",
2508                    t.name,
2509                    term,
2510                );
2511            }
2512        }
2513
2514        // 2. The server-level instructions (what tool-search sees
2515        // first).
2516        let server_info = harness_server_info();
2517        let instructions = server_info
2518            .instructions
2519            .as_deref()
2520            .expect("get_info() must set instructions");
2521        for term in FORBIDDEN {
2522            assert!(
2523                !contains_case_insensitive(instructions, term),
2524                "get_info().instructions contains forbidden jargon \
2525                 {:?} — rewrite in plain English",
2526                term,
2527            );
2528        }
2529    }
2530
2531    /// Build a `ServerInfo` for the jargon test without spinning up
2532    /// the full harness (which needs tokio + tempdir). The
2533    /// `ServerHandler::get_info()` method doesn't take `&self` state
2534    /// in any meaningful way for our impl — it returns a static
2535    /// `ServerInfo` literal — so we construct a minimal-input server
2536    /// just to call it.
2537    fn harness_server_info() -> rmcp::model::ServerInfo {
2538        let runtime = rt();
2539        let h = Harness::new(&runtime);
2540        let info = ServerHandler::get_info(&h.server);
2541        h.shutdown(&runtime);
2542        info
2543    }
2544
2545    /// Regression guard for the v0.9.0 → v0.9.1 P1 Fix 1 MCP
2546    /// `serverInfo` identity regression.
2547    ///
2548    /// In v0.9.0, P0a's rmcp 0.1.5 → 1.7 bump replaced the explicit
2549    /// `Implementation::new("solo", "<version>")` constructor with
2550    /// `Implementation::from_build_env()`. That helper reads
2551    /// `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from **rmcp's own** build
2552    /// environment (the proc-macro expansion captures rmcp's
2553    /// `Cargo.toml`, not the consumer's). Every Solo MCP daemon on
2554    /// v0.9.0 self-identified as `{name: "rmcp", version: "1.7.0"}`
2555    /// instead of `{name: "solo", version: "<workspace.version>"}`.
2556    ///
2557    /// Pins:
2558    ///   - `name == "solo"` (the operator-facing binary name, not
2559    ///     `"solo-api"` which would come from
2560    ///     `env!("CARGO_PKG_NAME")` against this crate's manifest);
2561    ///   - `version == env!("CARGO_PKG_VERSION")` from solo-api's own
2562    ///     compile environment (this is the workspace.package version
2563    ///     via inheritance, so it stays in sync with `solo --version`
2564    ///     and `solo-cli`'s identity).
2565    #[test]
2566    fn server_info_identity_is_solo_not_rmcp_or_solo_api() {
2567        let info = harness_server_info();
2568        let name = info.server_info.name.as_str();
2569        let version = info.server_info.version.as_str();
2570        assert_eq!(
2571            name, "solo",
2572            "MCP serverInfo.name must be \"solo\" (not \"rmcp\" or \
2573             \"solo-api\"). got name={name:?} version={version:?}"
2574        );
2575        assert_eq!(
2576            version,
2577            env!("CARGO_PKG_VERSION"),
2578            "MCP serverInfo.version must match solo-api's compile-time \
2579             CARGO_PKG_VERSION (i.e. the workspace.package version); \
2580             a mismatch means we regressed back to rmcp's build env. \
2581             got version={version:?}"
2582        );
2583    }
2584
2585    // ---- memory_inspect_cluster (v0.5.0 Priority 3) ----
2586
2587    #[test]
2588    fn inspect_cluster_unknown_id_returns_invalid_params() {
2589        // NotFound from solo_query::inspect_cluster is mapped through
2590        // `solo_to_mcp` to `invalid_params` (MCP has no separate
2591        // not-found error shape). Error message should name the id.
2592        let runtime = rt();
2593        let h = Harness::new(&runtime);
2594        runtime.block_on(async {
2595            let err = h
2596                .server
2597                .dispatch_tool(
2598                    "memory_inspect_cluster",
2599                    json!({ "cluster_id": "no-such-cluster" }),
2600                    None,
2601                )
2602                .await
2603                .expect_err("unknown cluster must error");
2604            let s = format!("{err:?}");
2605            assert!(
2606                s.contains("no-such-cluster") || s.to_lowercase().contains("not found"),
2607                "expected error to mention the missing cluster id; got: {s}"
2608            );
2609        });
2610        h.shutdown(&runtime);
2611    }
2612
2613    #[test]
2614    fn inspect_cluster_rejects_empty_id() {
2615        let runtime = rt();
2616        let h = Harness::new(&runtime);
2617        runtime.block_on(async {
2618            let err = h
2619                .server
2620                .dispatch_tool(
2621                    "memory_inspect_cluster",
2622                    json!({ "cluster_id": "   " }),
2623                    None,
2624                )
2625                .await
2626                .expect_err("blank cluster_id must error");
2627            let s = format!("{err:?}");
2628            assert!(
2629                s.to_lowercase().contains("cluster_id")
2630                    || s.to_lowercase().contains("must not be empty"),
2631                "got: {s}"
2632            );
2633        });
2634        h.shutdown(&runtime);
2635    }
2636
2637    // ---- Document tools (v0.7.0 P5) ----
2638    //
2639    // The five document handlers each have two arg-shape tests:
2640    //   - arg-struct parses from JSON (serde round-trip; defaults work).
2641    //   - dispatch arm routes to the handler (we observe behaviour via
2642    //     a known empty-DB response — bad routing surfaces as
2643    //     "unknown tool" or wrong shape).
2644    //
2645    // Functional coverage (ingest → search → inspect → forget) lives in
2646    // `crates/solo-cli/tests/mcp_smoke.rs` where a real subprocess + real
2647    // writer-with-embedder is wired up. The in-process Harness here uses
2648    // `WriterActor::spawn` which doesn't carry an embedder, so ingest /
2649    // search themselves return an error — but the dispatch + arg-parse
2650    // paths exercise correctly.
2651
2652    #[test]
2653    fn ingest_document_args_parse_with_required_path() {
2654        let v: IngestDocumentArgs =
2655            serde_json::from_value(json!({ "path": "/tmp/notes.md" })).expect("parses");
2656        assert_eq!(v.path, "/tmp/notes.md");
2657        // path is required — missing must reject at deserialization.
2658        let err = serde_json::from_value::<IngestDocumentArgs>(json!({})).unwrap_err();
2659        assert!(format!("{err}").contains("path"));
2660    }
2661
2662    #[test]
2663    fn search_docs_args_parse_with_default_limit() {
2664        let v: SearchDocsArgs =
2665            serde_json::from_value(json!({ "query": "backups" })).expect("parses");
2666        assert_eq!(v.query, "backups");
2667        assert_eq!(v.limit, 5, "default limit must be 5");
2668        let v: SearchDocsArgs =
2669            serde_json::from_value(json!({ "query": "backups", "limit": 20 })).expect("parses");
2670        assert_eq!(v.limit, 20);
2671    }
2672
2673    #[test]
2674    fn inspect_document_args_parse_with_required_doc_id() {
2675        let v: InspectDocumentArgs =
2676            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
2677        assert_eq!(v.doc_id, "abc");
2678        let err = serde_json::from_value::<InspectDocumentArgs>(json!({})).unwrap_err();
2679        assert!(format!("{err}").contains("doc_id"));
2680    }
2681
2682    #[test]
2683    fn list_documents_args_parse_with_all_defaults() {
2684        let v: ListDocumentsArgs = serde_json::from_value(json!({})).expect("parses");
2685        assert_eq!(v.limit, 20, "default limit must be 20");
2686        assert_eq!(v.offset, 0, "default offset must be 0");
2687        assert!(!v.include_forgotten, "default include_forgotten must be false");
2688        let v: ListDocumentsArgs = serde_json::from_value(
2689            json!({ "limit": 5, "offset": 10, "include_forgotten": true }),
2690        )
2691        .expect("parses");
2692        assert_eq!(v.limit, 5);
2693        assert_eq!(v.offset, 10);
2694        assert!(v.include_forgotten);
2695    }
2696
2697    #[test]
2698    fn forget_document_args_parse_with_required_doc_id() {
2699        let v: ForgetDocumentArgs =
2700            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
2701        assert_eq!(v.doc_id, "abc");
2702        let err = serde_json::from_value::<ForgetDocumentArgs>(json!({})).unwrap_err();
2703        assert!(format!("{err}").contains("doc_id"));
2704    }
2705
2706    #[test]
2707    fn ingest_document_rejects_empty_path() {
2708        // Reaches the dispatch arm → handle_ingest_document → empty
2709        // guard fires before the writer is touched. Proves routing.
2710        let runtime = rt();
2711        let h = Harness::new(&runtime);
2712        runtime.block_on(async {
2713            let err = h
2714                .server
2715                .dispatch_tool("memory_ingest_document", json!({ "path": "" }), None)
2716                .await
2717                .expect_err("empty path must error");
2718            let s = format!("{err:?}");
2719            assert!(
2720                s.to_lowercase().contains("path")
2721                    || s.to_lowercase().contains("must not be empty"),
2722                "got: {s}"
2723            );
2724        });
2725        h.shutdown(&runtime);
2726    }
2727
2728    #[test]
2729    fn search_docs_rejects_empty_query() {
2730        // Empty query trips solo_query::run_doc_search's validation
2731        // → InvalidInput → invalid_params.
2732        let runtime = rt();
2733        let h = Harness::new(&runtime);
2734        runtime.block_on(async {
2735            let err = h
2736                .server
2737                .dispatch_tool("memory_search_docs", json!({ "query": "   " }), None)
2738                .await
2739                .expect_err("empty query must error");
2740            let s = format!("{err:?}");
2741            assert!(
2742                s.to_lowercase().contains("must not be empty")
2743                    || s.to_lowercase().contains("invalid"),
2744                "got: {s}"
2745            );
2746        });
2747        h.shutdown(&runtime);
2748    }
2749
2750    #[test]
2751    fn inspect_document_unknown_id_returns_invalid_params() {
2752        // Valid UUID format but no row exists → handler returns
2753        // invalid_params with the missing id in the message.
2754        let runtime = rt();
2755        let h = Harness::new(&runtime);
2756        runtime.block_on(async {
2757            let err = h
2758                .server
2759                .dispatch_tool(
2760                    "memory_inspect_document",
2761                    json!({ "doc_id": "00000000-0000-7000-8000-000000000000" }),
2762                    None,
2763                )
2764                .await
2765                .expect_err("unknown doc must error");
2766            let s = format!("{err:?}");
2767            assert!(
2768                s.to_lowercase().contains("not found"),
2769                "expected 'not found' message; got: {s}"
2770            );
2771        });
2772        h.shutdown(&runtime);
2773    }
2774
2775    #[test]
2776    fn inspect_document_rejects_malformed_id() {
2777        let runtime = rt();
2778        let h = Harness::new(&runtime);
2779        runtime.block_on(async {
2780            let err = h
2781                .server
2782                .dispatch_tool(
2783                    "memory_inspect_document",
2784                    json!({ "doc_id": "not-a-uuid" }),
2785                    None,
2786                )
2787                .await
2788                .expect_err("malformed doc_id must error");
2789            let s = format!("{err:?}");
2790            assert!(s.contains("invalid doc_id"), "got: {s}");
2791        });
2792        h.shutdown(&runtime);
2793    }
2794
2795    #[test]
2796    fn list_documents_returns_empty_array_on_empty_db() {
2797        let runtime = rt();
2798        let h = Harness::new(&runtime);
2799        runtime.block_on(async {
2800            let r = h
2801                .server
2802                .dispatch_tool("memory_list_documents", json!({}), None)
2803                .await
2804                .expect("list succeeds");
2805            let text = first_text(&r);
2806            let v: serde_json::Value =
2807                serde_json::from_str(&text).expect("parses as json");
2808            assert!(v.is_array(), "expected array, got: {text}");
2809            assert_eq!(v.as_array().unwrap().len(), 0);
2810        });
2811        h.shutdown(&runtime);
2812    }
2813
2814    #[test]
2815    fn list_documents_passes_through_limit_offset_include_args() {
2816        let runtime = rt();
2817        let h = Harness::new(&runtime);
2818        runtime.block_on(async {
2819            let r = h
2820                .server
2821                .dispatch_tool(
2822                    "memory_list_documents",
2823                    json!({ "limit": 5, "offset": 10, "include_forgotten": true }),
2824                    None,
2825                )
2826                .await
2827                .expect("list with args succeeds");
2828            let text = first_text(&r);
2829            let v: serde_json::Value =
2830                serde_json::from_str(&text).expect("parses as json");
2831            assert!(v.is_array());
2832        });
2833        h.shutdown(&runtime);
2834    }
2835
2836    #[test]
2837    fn forget_document_rejects_malformed_id() {
2838        let runtime = rt();
2839        let h = Harness::new(&runtime);
2840        runtime.block_on(async {
2841            let err = h
2842                .server
2843                .dispatch_tool(
2844                    "memory_forget_document",
2845                    json!({ "doc_id": "not-a-uuid" }),
2846                    None,
2847                )
2848                .await
2849                .expect_err("malformed doc_id must error");
2850            let s = format!("{err:?}");
2851            assert!(s.contains("invalid doc_id"), "got: {s}");
2852        });
2853        h.shutdown(&runtime);
2854    }
2855
2856    // -----------------------------------------------------------------
2857    // v0.9.2 — `memory_remember_batch` + `salience` MCP layer tests.
2858    // -----------------------------------------------------------------
2859
2860    /// salience round-trip through `memory_remember`: an explicit
2861    /// in-range value reaches the writer; an absent value defaults
2862    /// to 0.5; an out-of-range value is rejected with invalid_params.
2863    #[test]
2864    fn remember_with_explicit_salience_round_trips() {
2865        let runtime = rt();
2866        let h = Harness::new(&runtime);
2867        runtime.block_on(async {
2868            let r = h
2869                .server
2870                .dispatch_tool(
2871                    "memory_remember",
2872                    json!({ "content": "with salience", "salience": 0.83 }),
2873                    None,
2874                )
2875                .await
2876                .expect("remember w/ salience succeeds");
2877            let text = first_text(&r);
2878            // Confirmation includes the new MemoryId.
2879            assert!(text.starts_with("remembered "), "got: {text}");
2880        });
2881        h.shutdown(&runtime);
2882    }
2883
2884    #[test]
2885    fn remember_with_out_of_range_salience_returns_invalid_params() {
2886        let runtime = rt();
2887        let h = Harness::new(&runtime);
2888        runtime.block_on(async {
2889            let err = h
2890                .server
2891                .dispatch_tool(
2892                    "memory_remember",
2893                    json!({ "content": "out of range", "salience": 1.5 }),
2894                    None,
2895                )
2896                .await
2897                .unwrap_err();
2898            let s = format!("{err:?}");
2899            assert!(s.contains("salience must be"), "got: {s}");
2900        });
2901        h.shutdown(&runtime);
2902    }
2903
2904    /// Salience boundary: 0.0 and 1.0 are both valid (inclusive range).
2905    #[test]
2906    fn remember_with_boundary_salience_succeeds() {
2907        let runtime = rt();
2908        let h = Harness::new(&runtime);
2909        runtime.block_on(async {
2910            for s in [0.0_f64, 1.0_f64] {
2911                let r = h
2912                    .server
2913                    .dispatch_tool(
2914                        "memory_remember",
2915                        json!({ "content": format!("boundary-{s}"), "salience": s }),
2916                        None,
2917                    )
2918                    .await
2919                    .expect("boundary salience succeeds");
2920                assert!(first_text(&r).starts_with("remembered "));
2921            }
2922        });
2923        h.shutdown(&runtime);
2924    }
2925
2926    /// Happy-path batch: 3 items go in, 3 memory_ids come out in order.
2927    #[test]
2928    fn remember_batch_returns_ids_in_order() {
2929        let runtime = rt();
2930        let h = Harness::new(&runtime);
2931        runtime.block_on(async {
2932            let items = json!([
2933                { "content": "batch-a" },
2934                { "content": "batch-b", "source_type": "user_preference", "salience": 0.9 },
2935                { "content": "batch-c", "salience": 0.1 },
2936            ]);
2937            let r = h
2938                .server
2939                .dispatch_tool(
2940                    "memory_remember_batch",
2941                    json!({ "items": items }),
2942                    None,
2943                )
2944                .await
2945                .expect("batch succeeds");
2946            let text = first_text(&r);
2947            let parsed: serde_json::Value =
2948                serde_json::from_str(&text).expect("reply is JSON");
2949            let arr = parsed.as_array().expect("reply is array");
2950            assert_eq!(arr.len(), 3, "3 items in → 3 ids out: {text}");
2951            // Each entry must be a UUID-shaped string.
2952            for v in arr {
2953                let s = v.as_str().unwrap_or_else(|| panic!("non-string id: {v}"));
2954                assert_eq!(s.len(), 36, "UUID-shaped id expected: {s}");
2955            }
2956            // Distinct ids.
2957            let mut ids: Vec<&str> = arr.iter().map(|v| v.as_str().unwrap()).collect();
2958            ids.sort();
2959            ids.dedup();
2960            assert_eq!(ids.len(), 3, "ids must be distinct: {text}");
2961        });
2962        h.shutdown(&runtime);
2963    }
2964
2965    /// Empty items → invalid_params before any embedding work.
2966    #[test]
2967    fn remember_batch_empty_items_returns_invalid_params() {
2968        let runtime = rt();
2969        let h = Harness::new(&runtime);
2970        runtime.block_on(async {
2971            let err = h
2972                .server
2973                .dispatch_tool(
2974                    "memory_remember_batch",
2975                    json!({ "items": [] }),
2976                    None,
2977                )
2978                .await
2979                .unwrap_err();
2980            let s = format!("{err:?}");
2981            assert!(s.contains("must not be empty"), "got: {s}");
2982        });
2983        h.shutdown(&runtime);
2984    }
2985
2986    /// Per-item validation: empty content trips invalid_params with the
2987    /// index of the offending item baked into the message.
2988    #[test]
2989    fn remember_batch_rejects_per_item_empty_content() {
2990        let runtime = rt();
2991        let h = Harness::new(&runtime);
2992        runtime.block_on(async {
2993            let items = json!([
2994                { "content": "ok-1" },
2995                { "content": "   " },
2996                { "content": "ok-3" },
2997            ]);
2998            let err = h
2999                .server
3000                .dispatch_tool(
3001                    "memory_remember_batch",
3002                    json!({ "items": items }),
3003                    None,
3004                )
3005                .await
3006                .unwrap_err();
3007            let s = format!("{err:?}");
3008            assert!(s.contains("items[1]"), "must mention items[1]: {s}");
3009            assert!(s.contains("must not be empty"), "got: {s}");
3010        });
3011        h.shutdown(&runtime);
3012    }
3013
3014    /// Per-item validation: out-of-range salience trips invalid_params
3015    /// with the item index in the message.
3016    #[test]
3017    fn remember_batch_rejects_per_item_salience_out_of_range() {
3018        let runtime = rt();
3019        let h = Harness::new(&runtime);
3020        runtime.block_on(async {
3021            let items = json!([
3022                { "content": "ok-1", "salience": 0.5 },
3023                { "content": "out-of-range", "salience": -0.1 },
3024            ]);
3025            let err = h
3026                .server
3027                .dispatch_tool(
3028                    "memory_remember_batch",
3029                    json!({ "items": items }),
3030                    None,
3031                )
3032                .await
3033                .unwrap_err();
3034            let s = format!("{err:?}");
3035            assert!(s.contains("items[1]"), "must mention items[1]: {s}");
3036            assert!(s.contains("salience must be"), "got: {s}");
3037        });
3038        h.shutdown(&runtime);
3039    }
3040
3041    /// Over-cap batch is rejected at the MCP layer so we never embed
3042    /// 201+ items. Pinned at the same constant as the writer-actor.
3043    #[test]
3044    fn remember_batch_over_cap_returns_invalid_params() {
3045        let runtime = rt();
3046        let h = Harness::new(&runtime);
3047        runtime.block_on(async {
3048            let items: Vec<serde_json::Value> =
3049                (0..(solo_storage::MAX_REMEMBER_BATCH_SIZE + 1))
3050                    .map(|i| json!({ "content": format!("over-{i}") }))
3051                    .collect();
3052            let err = h
3053                .server
3054                .dispatch_tool(
3055                    "memory_remember_batch",
3056                    json!({ "items": items }),
3057                    None,
3058                )
3059                .await
3060                .unwrap_err();
3061            let s = format!("{err:?}");
3062            assert!(
3063                s.contains("MAX_REMEMBER_BATCH_SIZE"),
3064                "must mention the cap: {s}"
3065            );
3066        });
3067        h.shutdown(&runtime);
3068    }
3069
3070    // -----------------------------------------------------------------
3071    // v0.11.0 P3: per-tool progress event tests.
3072    //
3073    // These tests invoke `dispatch_tool` with a real
3074    // `ProgressReporter` wired to a fresh `SessionState`, then drain
3075    // the session's broadcast receiver to observe the emitted events.
3076    // The pattern mirrors `mcp_progress::tests::progress_reporter_*`
3077    // but exercises the full handler call stack (including the writer
3078    // and query pipelines) end-to-end.
3079    // -----------------------------------------------------------------
3080
3081    use crate::mcp_progress::{ProgressReporter, ProgressToken};
3082    use crate::mcp_session::SessionState;
3083    use std::sync::Arc as StdArc2;
3084
3085    fn fresh_progress_session() -> StdArc2<SessionState> {
3086        StdArc2::new(SessionState::new(solo_core::TenantId::default_tenant(), None))
3087    }
3088
3089    fn drain_progress_events(
3090        rx: &mut tokio::sync::broadcast::Receiver<crate::mcp_session::McpStreamEvent>,
3091    ) -> Vec<crate::mcp_session::McpStreamEvent> {
3092        let mut out = Vec::new();
3093        while let Ok(ev) = rx.try_recv() {
3094            out.push(ev);
3095        }
3096        out
3097    }
3098
3099    // v0.11.0 P3 note: `ingest_document_emits_progress_at_*` test lives
3100    // in `http::handler_tests` because the dispatch_tests harness uses
3101    // `WriterActor::spawn` (no embedder), so an end-to-end ingest panics
3102    // with "writer has no embedder". The handler_tests harness uses
3103    // `WriterActor::spawn_full` which carries an embedder; we exercise
3104    // the ingest progress checkpoints there.
3105
3106    /// v0.11.0 P3: `memory_search_docs` emits 3 progress events when
3107    /// `top_k` exceeds the threshold (100).
3108    #[test]
3109    fn search_docs_emits_progress_only_when_top_k_above_100() {
3110        let runtime = rt();
3111        let h = Harness::new(&runtime);
3112        runtime.block_on(async {
3113            let session = fresh_progress_session();
3114            let mut rx = session.subscribe_events();
3115            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!(42)));
3116            let _r = h
3117                .server
3118                .dispatch_tool(
3119                    "memory_search_docs",
3120                    json!({ "query": "anything", "limit": 150 }),
3121                    Some(reporter),
3122                )
3123                .await
3124                .expect("search succeeds");
3125            let events = drain_progress_events(&mut rx);
3126            assert_eq!(
3127                events.len(),
3128                3,
3129                "expected 3 search progress events at top_k=150, got {}",
3130                events.len()
3131            );
3132            // Spec shape: every event uses progressToken (echoed as
3133            // number 42) and walks progress 1..=3.
3134            for (i, ev) in events.iter().enumerate() {
3135                let params = &ev.data["params"];
3136                assert_eq!(params["progressToken"], json!(42));
3137                assert_eq!(params["total"], json!(3));
3138                assert_eq!(params["progress"], json!((i + 1) as u64));
3139            }
3140        });
3141        h.shutdown(&runtime);
3142    }
3143
3144    /// v0.11.0 P3: `memory_search_docs` with `top_k <= 100` does NOT
3145    /// emit progress events even when a reporter is wired. Threshold
3146    /// gating per Decision C.
3147    #[test]
3148    fn search_docs_emits_no_progress_when_top_k_below_threshold() {
3149        let runtime = rt();
3150        let h = Harness::new(&runtime);
3151        runtime.block_on(async {
3152            let session = fresh_progress_session();
3153            let mut rx = session.subscribe_events();
3154            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("t")));
3155            let _r = h
3156                .server
3157                .dispatch_tool(
3158                    "memory_search_docs",
3159                    json!({ "query": "anything", "limit": 50 }),
3160                    Some(reporter),
3161                )
3162                .await
3163                .expect("search succeeds");
3164            let events = drain_progress_events(&mut rx);
3165            assert!(
3166                events.is_empty(),
3167                "expected no progress events at top_k=50, got {events:?}"
3168            );
3169        });
3170        h.shutdown(&runtime);
3171    }
3172
3173    /// v0.11.0 P3: `memory_remember_batch` with > 50 items emits
3174    /// per-25-items embed progress + a final "embedded" + "inserted"
3175    /// event. A 51-item batch fires at items 25, 50, then embedded
3176    /// (51/51), then inserted (51/51) = 4 events.
3177    #[test]
3178    fn remember_batch_emits_progress_only_when_size_above_50() {
3179        let runtime = rt();
3180        let h = Harness::new(&runtime);
3181        runtime.block_on(async {
3182            let session = fresh_progress_session();
3183            let mut rx = session.subscribe_events();
3184            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("batch")));
3185            let items: Vec<serde_json::Value> = (0..51)
3186                .map(|i| json!({ "content": format!("item-{i}") }))
3187                .collect();
3188            let _r = h
3189                .server
3190                .dispatch_tool(
3191                    "memory_remember_batch",
3192                    json!({ "items": items }),
3193                    Some(reporter),
3194                )
3195                .await
3196                .expect("batch succeeds");
3197            let events = drain_progress_events(&mut rx);
3198            assert_eq!(
3199                events.len(),
3200                4,
3201                "expected 4 batch progress events for 51 items, got {}: {events:?}",
3202                events.len()
3203            );
3204            // First event = 25/51 "embedding"; second = 50/51 "embedding";
3205            // third = 51/51 "embedded"; fourth = 51/51 "inserted".
3206            let progresses: Vec<u64> = events
3207                .iter()
3208                .map(|e| e.data["params"]["progress"].as_u64().unwrap_or(0))
3209                .collect();
3210            assert_eq!(progresses, vec![25, 50, 51, 51]);
3211            assert_eq!(
3212                events.last().unwrap().data["params"]["message"],
3213                json!("inserted")
3214            );
3215            for ev in &events {
3216                assert_eq!(ev.data["params"]["progressToken"], json!("batch"));
3217                assert_eq!(ev.data["params"]["total"], json!(51));
3218            }
3219        });
3220        h.shutdown(&runtime);
3221    }
3222
3223    /// v0.11.0 P3: small batches (<= 50) do NOT emit progress events
3224    /// even with a reporter wired. Wire-overhead gating per Decision C.
3225    #[test]
3226    fn remember_batch_emits_no_progress_when_size_below_threshold() {
3227        let runtime = rt();
3228        let h = Harness::new(&runtime);
3229        runtime.block_on(async {
3230            let session = fresh_progress_session();
3231            let mut rx = session.subscribe_events();
3232            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("t")));
3233            // 5 items — well below the threshold.
3234            let items: Vec<serde_json::Value> = (0..5)
3235                .map(|i| json!({ "content": format!("small-{i}") }))
3236                .collect();
3237            let _r = h
3238                .server
3239                .dispatch_tool(
3240                    "memory_remember_batch",
3241                    json!({ "items": items }),
3242                    Some(reporter),
3243                )
3244                .await
3245                .expect("batch succeeds");
3246            let events = drain_progress_events(&mut rx);
3247            assert!(
3248                events.is_empty(),
3249                "expected no progress events for 5-item batch, got {events:?}"
3250            );
3251        });
3252        h.shutdown(&runtime);
3253    }
3254
3255    /// v0.11.0 P3: stdio-style calls (no session = no progress reporter)
3256    /// must not panic and must produce no events. This pins the
3257    /// backward-compat invariant the rmcp `call_tool` path relies on.
3258    /// Uses `memory_search_docs` (no embedder dependency in the
3259    /// dispatch_tests harness) — the equivalent ingest_document
3260    /// "no progress" guarantee is asserted in `http::handler_tests`
3261    /// via the same `None` path.
3262    #[test]
3263    fn stdio_transport_does_not_emit_progress_events() {
3264        let runtime = rt();
3265        let h = Harness::new(&runtime);
3266        runtime.block_on(async {
3267            // Construct a session purely for the rx end — the tool call
3268            // gets `None`, so the session must NOT receive anything.
3269            let session = fresh_progress_session();
3270            let mut rx = session.subscribe_events();
3271            let _r = h
3272                .server
3273                .dispatch_tool(
3274                    "memory_search_docs",
3275                    // Above the threshold so progress WOULD fire if a
3276                    // reporter were wired — but no reporter = no events.
3277                    json!({ "query": "anything", "limit": 200 }),
3278                    None, // stdio: no reporter
3279                )
3280                .await
3281                .expect("search succeeds without reporter");
3282            let events = drain_progress_events(&mut rx);
3283            assert!(
3284                events.is_empty(),
3285                "stdio path (no reporter) must not publish to ANY session: {events:?}"
3286            );
3287        });
3288        h.shutdown(&runtime);
3289    }
3290
3291    /// v0.11.0 P3: emitted event ids are monotonically increasing per
3292    /// session across multiple tool calls. Pinned to surface any
3293    /// regression in `SessionState::publish_event`'s id allocator.
3294    #[test]
3295    fn progress_event_id_monotonic_per_session() {
3296        let runtime = rt();
3297        let h = Harness::new(&runtime);
3298        runtime.block_on(async {
3299            let session = fresh_progress_session();
3300            let mut rx = session.subscribe_events();
3301            // Two calls in sequence with progress; observe interleaved
3302            // ids stay strictly increasing.
3303            let r1 = ProgressReporter::new(session.clone(), ProgressToken(json!("a")));
3304            let r2 = ProgressReporter::new(session.clone(), ProgressToken(json!("b")));
3305            let _ = h
3306                .server
3307                .dispatch_tool(
3308                    "memory_search_docs",
3309                    json!({ "query": "q1", "limit": 150 }),
3310                    Some(r1),
3311                )
3312                .await;
3313            let _ = h
3314                .server
3315                .dispatch_tool(
3316                    "memory_search_docs",
3317                    json!({ "query": "q2", "limit": 150 }),
3318                    Some(r2),
3319                )
3320                .await;
3321            let events = drain_progress_events(&mut rx);
3322            assert!(events.len() >= 6, "expected at least 6 events: {events:?}");
3323            let ids: Vec<u64> = events.iter().map(|e| e.id).collect();
3324            for w in ids.windows(2) {
3325                assert!(
3326                    w[0] < w[1],
3327                    "event ids must be strictly monotonic: {ids:?}"
3328                );
3329            }
3330        });
3331        h.shutdown(&runtime);
3332    }
3333}
3334
3335// ===========================================================================
3336// v0.8.1 P2: MCP audit principal extraction
3337// ===========================================================================
3338//
3339// These tests live in their own module because they manipulate the
3340// `SOLO_MCP_PRINCIPAL_TOKEN` env var, which is process-global mutable
3341// state. Serialised via a static `Mutex` so cargo test's multi-threaded
3342// runner doesn't race. Pattern mirrors the env-guard discipline in
3343// `solo_cli::commands::common::ollama_overrides_tests`.
3344
3345#[cfg(test)]
3346mod principal_extraction_tests {
3347    use super::*;
3348    use std::sync::Mutex;
3349
3350    /// Serialise tests that mutate `SOLO_MCP_PRINCIPAL_TOKEN`. Poisoned
3351    /// guards are recovered via `into_inner` so one panicking test
3352    /// doesn't sink the rest of the suite.
3353    static ENV_LOCK: Mutex<()> = Mutex::new(());
3354
3355    /// RAII guard that unsets the env var on drop, so a panicking test
3356    /// doesn't leak state into the next case.
3357    struct EnvGuard;
3358    impl Drop for EnvGuard {
3359        fn drop(&mut self) {
3360            // SAFETY: every caller holds ENV_LOCK across construct + drop.
3361            unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
3362        }
3363    }
3364
3365    fn set_principal_env(val: &str) -> EnvGuard {
3366        // SAFETY: ENV_LOCK held by caller.
3367        unsafe { std::env::set_var(ENV_MCP_PRINCIPAL_TOKEN, val) };
3368        EnvGuard
3369    }
3370
3371    fn clear_principal_env() -> EnvGuard {
3372        // SAFETY: ENV_LOCK held by caller.
3373        unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
3374        EnvGuard
3375    }
3376
3377    /// Stdio path: setting `SOLO_MCP_PRINCIPAL_TOKEN` produces a
3378    /// non-None principal at construction time.
3379    #[test]
3380    fn stdio_env_var_resolves_to_principal() {
3381        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3382        let _g = set_principal_env("alice-token");
3383        let resolved = resolve_mcp_principal(None);
3384        assert_eq!(resolved.as_deref(), Some("alice-token"));
3385    }
3386
3387    /// Stdio path: absent env var ⇒ `None` (regression — must preserve
3388    /// v0.8.0 behaviour for users without auth).
3389    #[test]
3390    fn stdio_no_env_var_resolves_to_none() {
3391        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3392        let _g = clear_principal_env();
3393        assert_eq!(resolve_mcp_principal(None), None);
3394    }
3395
3396    /// Stdio path: whitespace-only env var ⇒ `None` (don't pin every
3397    /// audit row to an empty/blank principal because of a launcher
3398    /// typo).
3399    #[test]
3400    fn stdio_whitespace_env_var_resolves_to_none() {
3401        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3402        let _g = set_principal_env("   \t  ");
3403        assert_eq!(resolve_mcp_principal(None), None);
3404    }
3405
3406    /// HTTP-MCP path: `Authorization: Bearer <token>` header resolves
3407    /// to the token as principal.
3408    #[test]
3409    fn http_header_resolves_to_bearer_token_principal() {
3410        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3411        let _g = clear_principal_env();
3412        let resolved = resolve_mcp_principal(Some("Bearer api-token-xyz"));
3413        assert_eq!(resolved.as_deref(), Some("api-token-xyz"));
3414    }
3415
3416    /// Precedence: when both env var AND header carry a token, the
3417    /// header wins (consistent with the rest of the auth stack — JWT
3418    /// claim beats `X-Solo-Tenant` header).
3419    #[test]
3420    fn http_header_beats_env_var() {
3421        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3422        let _g = set_principal_env("env-token");
3423        let resolved = resolve_mcp_principal(Some("Bearer header-token"));
3424        assert_eq!(
3425            resolved.as_deref(),
3426            Some("header-token"),
3427            "header MUST win over env var per documented precedence"
3428        );
3429    }
3430
3431    /// HTTP-MCP path: malformed header (no `Bearer ` prefix) ⇒ falls
3432    /// through to env-var path.
3433    #[test]
3434    fn http_malformed_header_falls_through_to_env() {
3435        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3436        let _g = set_principal_env("env-fallback");
3437        let resolved = resolve_mcp_principal(Some("Basic dXNlcjpwYXNz"));
3438        assert_eq!(resolved.as_deref(), Some("env-fallback"));
3439    }
3440
3441    /// HTTP-MCP path: empty bearer header (`Bearer ` with no token)
3442    /// falls through to env-var path. Matches the spirit of the
3443    /// whitespace-env-var rejection — don't credit a half-formed
3444    /// header.
3445    #[test]
3446    fn http_empty_bearer_header_falls_through_to_env() {
3447        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3448        let _g = set_principal_env("env-fallback");
3449        let resolved = resolve_mcp_principal(Some("Bearer   "));
3450        assert_eq!(resolved.as_deref(), Some("env-fallback"));
3451    }
3452
3453    /// Across N consecutive calls of `resolve_mcp_principal`, the
3454    /// resolved principal is stable for the same env-var setting
3455    /// (regression guard: an accidental thread-local cache would
3456    /// break the "stable across N tool calls in one session" contract
3457    /// the brief calls out).
3458    #[test]
3459    fn stable_across_multiple_resolutions() {
3460        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3461        let _g = set_principal_env("stable-token");
3462        for _ in 0..5 {
3463            assert_eq!(
3464                resolve_mcp_principal(None).as_deref(),
3465                Some("stable-token")
3466            );
3467        }
3468    }
3469}
3470
3471/// v0.9.0 P2 tests for the MCP-initialize-time LLM-config gate.
3472///
3473/// Pure-function tests of [`initialize_decision`]: no rmcp Peer is
3474/// constructed (the type's constructors are private), no MCP handshake
3475/// is driven. The wire-up between `initialize_decision` and the
3476/// side-effect path lives in [`SoloMcpServer::initialize`] and is
3477/// covered indirectly by the audit-row tests in
3478/// [`crate::llm::sampling::tests`] — those exercise the same
3479/// `SamplingLlmClient` + `WriteCommand::EmitLlmSamplingAudit` path
3480/// that `populate_sampling_steward` constructs.
3481#[cfg(test)]
3482mod initialize_decision_tests {
3483    use super::*;
3484    use solo_storage::LlmSettings;
3485
3486    /// `[llm]` absent → always Allow (matches v0.8.x behaviour).
3487    #[test]
3488    fn no_llm_block_allows_initialize_regardless_of_sampling_capability() {
3489        assert_eq!(initialize_decision(&None, false), InitializeDecision::Allow);
3490        assert_eq!(initialize_decision(&None, true), InitializeDecision::Allow);
3491    }
3492
3493    /// `[llm] mode = "none"` → always Allow.
3494    #[test]
3495    fn llm_none_allows_initialize_regardless_of_sampling_capability() {
3496        let s = Some(LlmSettings::None);
3497        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
3498        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
3499    }
3500
3501    /// `[llm] mode = "anthropic"` → always Allow.
3502    #[test]
3503    fn llm_anthropic_allows_initialize_regardless_of_sampling_capability() {
3504        let s = Some(LlmSettings::Anthropic {
3505            api_key_env: "ANTHROPIC_API_KEY".into(),
3506            model: "claude-sonnet-4-6".into(),
3507        });
3508        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
3509        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
3510    }
3511
3512    /// `[llm] mode = "ollama"` → always Allow.
3513    #[test]
3514    fn llm_ollama_allows_initialize_regardless_of_sampling_capability() {
3515        let s = Some(LlmSettings::Ollama {
3516            base_url: "http://localhost:11434".into(),
3517            model: "qwen3-coder:30b".into(),
3518        });
3519        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
3520        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
3521    }
3522
3523    /// `[llm] mode = "mcp_sampling"` + peer with sampling capability →
3524    /// populate the slot.
3525    #[test]
3526    fn llm_mcp_sampling_with_sampling_capability_populates_slot() {
3527        let s = Some(LlmSettings::McpSampling);
3528        assert_eq!(
3529            initialize_decision(&s, true),
3530            InitializeDecision::PopulateSamplingSteward
3531        );
3532    }
3533
3534    /// `[llm] mode = "mcp_sampling"` + peer WITHOUT sampling
3535    /// capability → reject initialize with the locked BLOCKER 2 error.
3536    #[test]
3537    fn llm_mcp_sampling_without_sampling_capability_rejects() {
3538        let s = Some(LlmSettings::McpSampling);
3539        assert_eq!(
3540            initialize_decision(&s, false),
3541            InitializeDecision::RejectMissingSamplingCapability
3542        );
3543    }
3544
3545    /// The locked BLOCKER 2 error message body is byte-stable: a future
3546    /// audit-revision can grep these strings and confirm they still
3547    /// land.
3548    #[test]
3549    fn sampling_capability_missing_error_message_contains_all_alternatives() {
3550        let msg = sampling_capability_missing_error_message();
3551        // Banner + four alternative blocks.
3552        assert!(msg.contains("LLM backend `mcp_sampling`"));
3553        assert!(msg.contains("mode = \"anthropic\""));
3554        assert!(msg.contains("api_key_env = \"ANTHROPIC_API_KEY\""));
3555        assert!(msg.contains("mode = \"openai\""));
3556        assert!(msg.contains("api_key_env = \"OPENAI_API_KEY\""));
3557        assert!(msg.contains("mode = \"ollama\""));
3558        assert!(msg.contains("base_url = \"http://localhost:11434\""));
3559        assert!(msg.contains("mode = \"none\""));
3560        // Footer pointer at the release-prep doc.
3561        assert!(msg.contains("docs/releases/v0.9.0.md"));
3562    }
3563}
3564
3565// fetch_recall_rows + RecallHit + RecallRow used to live here. Recall
3566// pipeline moved to solo_query::recall in commit (consolidate-recall);
3567// transports just call solo_query::run_recall and format the result.
solo_api/mcp.rs

solo_api/
mcp.rs