Skip to main content

solo_api/
mcp.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! MCP (Model Context Protocol) server for Solo.
4//!
5//! Exposes thirteen tools to MCP clients (Claude Desktop, Cursor, etc.):
6//!
7//! Episode tools (v0.1+):
8//!   - `memory_remember(content, source_type?, source_id?)` — store an
9//!     episode. Returns the new MemoryId.
10//!   - `memory_recall(query, limit?)` — vector search. Returns the top-K
11//!     matches with content + tier + status.
12//!   - `memory_forget(memory_id, reason?)` — soft-delete an episode.
13//!   - `memory_inspect(memory_id)` — return the full episode record.
14//!
15//! Derived-layer tools (v0.4.0+):
16//!   - `memory_themes(window_days?, limit?)` — list cluster themes.
17//!   - `memory_facts_about(subject, ...)` — query the structured-fact
18//!     knowledge graph (subject-predicate-object triples).
19//!   - `memory_contradictions(limit?)` — disagreements flagged during
20//!     consolidation.
21//!
22//! Derived-layer tools (v0.5.0+):
23//!   - `memory_inspect_cluster(cluster_id, full_content?)` — drill
24//!     into one cluster's abstraction + source episodes (truncated).
25//!
26//! Document tools (v0.7.0+):
27//!   - `memory_ingest_document(path)` — read a file from disk, split it
28//!     into chunks, embed each, and store under documents/document_chunks.
29//!   - `memory_search_docs(query, limit?)` — vector search restricted to
30//!     document chunks; returns chunk content + parent-doc context.
31//!   - `memory_inspect_document(doc_id)` — show one document's metadata
32//!     plus a previewed list of its chunks.
33//!   - `memory_list_documents(limit?, offset?, include_forgotten?)` —
34//!     paginate over ingested documents, newest first.
35//!   - `memory_forget_document(doc_id)` — soft-delete a document; chunks
36//!     stop appearing in `memory_search_docs` and tombstone in HNSW.
37//!
38//! ## Transport
39//!
40//! `serve_stdio` wires the server to stdin/stdout for use as a subprocess
41//! ("`claude_desktop_config.json` or `~/.cursor/mcp.json` invokes
42//! `solo mcp-stdio`"). The function awaits a graceful shutdown when stdin
43//! closes (parent disconnects) — same lifecycle as `solo daemon`'s
44//! Ctrl+C path.
45//!
46//! ## What's deferred
47//!
48//! - SSE/HTTP transports — `rmcp` ships them, but v0.1 ships stdio only.
49//! - `prompts/` and `resources/` capabilities — not needed for the
50//!   four-tool surface; ServerHandler defaults return empty lists.
51//! - Tool argument validation beyond JSON Schema typing — we trust rmcp
52//!   to deserialize per the schema, then serde-deserialize into our
53//!   typed param structs. Bad inputs surface as clear errors.
54
55use std::sync::Arc;
56
57use rmcp::handler::server::ServerHandler;
58use rmcp::model::{
59    CallToolRequestParams as CallToolRequestParam, CallToolResult, Content, Implementation,
60    InitializeRequestParams, InitializeResult, ListToolsResult,
61    PaginatedRequestParams as PaginatedRequestParam, ProtocolVersion,
62    ServerCapabilities, ServerInfo, Tool,
63};
64use rmcp::service::{RequestContext, RoleServer};
65use rmcp::{ErrorData as McpError, ServiceExt};
66use serde::{Deserialize, Serialize};
67use solo_core::{
68    Confidence, DocumentId, EncodingContext, Episode, MemoryId, Tier,
69};
70use solo_storage::{TenantHandle, TenantRegistry};
71use std::str::FromStr;
72
73/// The MCP server. Cheap to clone — every field is `Arc`-cloneable.
74///
75/// v0.8.0 P2: an MCP session resolves to **one tenant**. The session's
76/// `tenant_handle` is resolved at `initialize` time (today: from the
77/// CLI invocation via `solo mcp-stdio --tenant <id>`; future versions
78/// may resolve per-bearer-token via OIDC). Subsequent `tools/call`
79/// invocations route through the cached handle without re-resolving.
80/// Operators that need multi-tenant MCP spawn one `solo mcp-stdio`
81/// subprocess per tenant.
82#[derive(Clone)]
83pub struct SoloMcpServer {
84    inner: Arc<Inner>,
85}
86
87struct Inner {
88    /// Multi-tenant registry shared across all sessions. Held so that a
89    /// future MCP capability that lists/inspects other tenants has a
90    /// path to them (out of scope for v0.8.0 P2). P3 (auth) will use
91    /// this to re-resolve the tenant from a bearer-token claim.
92    #[allow(dead_code)]
93    registry: Arc<TenantRegistry>,
94    /// The tenant this MCP session speaks for. Resolved at session
95    /// construction time.
96    tenant: Arc<TenantHandle>,
97    /// Read-path aliases for the canonical `"user"` subject. Sourced
98    /// from `solo.config.toml` `[identity] user_aliases`; threaded
99    /// through to `solo_query::facts_about` so a query for `"alex"`
100    /// also surfaces rows historically extracted as `"user"`. Empty
101    /// vec = behave as today (no expansion).
102    user_aliases: Vec<String>,
103    /// v0.8.0 P4 audit-log principal for this MCP session. MCP is
104    /// bearer-only (no OIDC story in the spec), so the principal is
105    /// effectively `"bearer"` when the daemon was started with
106    /// `--bearer-token-file` and `None` otherwise. Persisted here so
107    /// every tool dispatch threads it into the audit emit without
108    /// reconstructing it per call.
109    audit_principal: Option<String>,
110}
111
112/// v0.9.0 P2: outcome of inspecting the tenant's `[llm]` config + the
113/// peer's `sampling` capability at MCP `initialize` time.
114///
115/// Separating the decision from the actual slot write makes the
116/// gating logic unit-testable without needing a real
117/// `rmcp::Peer<RoleServer>` (whose constructors are private).
118/// `SoloMcpServer::initialize` performs the match and routes to the
119/// side-effect path; tests pin the table directly.
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
121pub enum InitializeDecision {
122    /// Tenant's LLM backend doesn't require an MCP peer; the slot was
123    /// populated eagerly at registry-open time (or stays `None` for
124    /// `LlmConfig::None`). MCP initialize succeeds without writing the
125    /// slot.
126    Allow,
127    /// Tenant's LLM backend is `mcp_sampling` AND the peer advertised
128    /// the `sampling` capability. `populate_sampling_steward` writes a
129    /// peer-bound Steward into the slot.
130    PopulateSamplingSteward,
131    /// Tenant's LLM backend is `mcp_sampling` but the peer did NOT
132    /// advertise the `sampling` capability. MCP initialize must refuse
133    /// with the locked BLOCKER 2 error message.
134    RejectMissingSamplingCapability,
135}
136
137/// v0.9.0 P2: decide the initialize outcome given the tenant's
138/// `[llm]` config and whether the peer advertised the `sampling`
139/// capability.
140///
141/// Pure function — no side effects, no rmcp peer required. Pinned by
142/// `initialize_decision_*` tests.
143pub fn initialize_decision(
144    llm_settings: &Option<solo_storage::LlmSettings>,
145    peer_sampling_supported: bool,
146) -> InitializeDecision {
147    match llm_settings {
148        Some(settings) if settings.requires_mcp_peer() => {
149            if peer_sampling_supported {
150                InitializeDecision::PopulateSamplingSteward
151            } else {
152                InitializeDecision::RejectMissingSamplingCapability
153            }
154        }
155        _ => InitializeDecision::Allow,
156    }
157}
158
159/// v0.9.0 P2: locked error message body for both the daemon-startup
160/// rejection guard and the MCP `initialize` capability gate (plan §3
161/// Decision 4 / BLOCKER 2 resolution). Returned verbatim to the
162/// operator so the commented-out TOML snippets are copy-pasteable.
163///
164/// Lives at module scope so the daemon startup path (in `solo-cli`)
165/// and the `SoloMcpServer::initialize` hook share one source of truth
166/// — a future audit-revision can grep the locked phrasing without
167/// chasing two divergent copies.
168pub fn sampling_capability_missing_error_message() -> String {
169    [
170        "LLM backend `mcp_sampling` requires a connected MCP client that",
171        "advertises the `sampling` capability at initialize. Either the",
172        "current MCP client does not support sampling, or this Solo",
173        "process is running in daemon-only mode (no peer to call back).",
174        "",
175        "Pick one of:",
176        "",
177        "  # Anthropic (hosted):",
178        "  [llm]",
179        "  mode = \"anthropic\"",
180        "  api_key_env = \"ANTHROPIC_API_KEY\"",
181        "  model = \"claude-sonnet-4-6\"",
182        "",
183        "  # OpenAI (hosted):",
184        "  [llm]",
185        "  mode = \"openai\"",
186        "  api_key_env = \"OPENAI_API_KEY\"",
187        "  model = \"gpt-5o\"",
188        "",
189        "  # Ollama (local daemon):",
190        "  [llm]",
191        "  mode = \"ollama\"",
192        "  base_url = \"http://localhost:11434\"",
193        "  model = \"qwen3-coder:30b\"",
194        "",
195        "  # None (cluster-only; abstractions skipped):",
196        "  [llm]",
197        "  mode = \"none\"",
198        "",
199        "See docs/releases/v0.9.0.md \u{00a7}LLM-backend selection for details.",
200    ]
201    .join("\n")
202}
203
204/// v0.8.1 P2: env var name MCP clients set when launching the server
205/// process to attribute audit rows on the stdio transport. Closes the
206/// v0.8.0 known-issue gap where MCP audit rows always carried
207/// `principal_subject = NULL` on the daemon path.
208///
209/// Precedence (when the future HTTP-MCP transport lands):
210///   1. `Authorization: Bearer <token>` header on the HTTP-MCP request
211///      (resolved through `AuthConfig::Bearer` validator).
212///   2. `SOLO_MCP_PRINCIPAL_TOKEN` env var on the spawned process.
213///
214/// For the v0.8.x stdio-only world only the env-var path applies; the
215/// header path is a no-op (no HTTP transport wired). The constant lives
216/// at module scope so external callers (CLI subcommand, tests) reference
217/// it by name rather than re-typing the string literal.
218pub const ENV_MCP_PRINCIPAL_TOKEN: &str = "SOLO_MCP_PRINCIPAL_TOKEN";
219
220/// v0.8.1 P2: resolve the MCP-session principal at `initialize`-time.
221///
222/// Reads `SOLO_MCP_PRINCIPAL_TOKEN` env var (stdio path); future HTTP-MCP
223/// callers will pass the bearer header value in via the explicit
224/// `header_value` arg. The header beats the env when both are present.
225///
226/// Returns `Some(subject)` on resolution success; `None` when neither
227/// source carries a non-empty value. Empty / whitespace-only values are
228/// treated as absent so an accidentally-set `SOLO_MCP_PRINCIPAL_TOKEN=""`
229/// in a launcher script doesn't pin every audit row to a blank principal.
230///
231/// The current implementation treats the env var value as the principal
232/// subject directly. A future hardening pass can validate against the
233/// daemon's `[auth] bearer.token` config to refuse mismatched tokens —
234/// today the env var is operator-trusted (same trust model as
235/// `SOLO_PASSPHRASE`).
236pub fn resolve_mcp_principal(header_value: Option<&str>) -> Option<String> {
237    // HTTP-MCP path wins when configured.
238    if let Some(h) = header_value {
239        if let Some(token) = h.strip_prefix("Bearer ") {
240            let trimmed = token.trim();
241            if !trimmed.is_empty() {
242                // Header carries the raw bearer token. Same shape as the
243                // stdio env-var path: the *value* is the principal
244                // subject in v0.8.1; v0.8.2+ may validate against a
245                // configured token set and surface the JWT `sub` claim
246                // instead.
247                return Some(trimmed.to_string());
248            }
249        }
250    }
251    // Stdio env-var fallback.
252    match std::env::var(ENV_MCP_PRINCIPAL_TOKEN) {
253        Ok(v) => {
254            let trimmed = v.trim();
255            if trimmed.is_empty() {
256                None
257            } else {
258                Some(trimmed.to_string())
259            }
260        }
261        Err(_) => None,
262    }
263}
264
265impl SoloMcpServer {
266    /// Build a server speaking for `tenant` (v0.8.0 P2 — one MCP session
267    /// ↔ one tenant). The registry is held so future capabilities can
268    /// reach across tenants if needed; today every handler routes
269    /// through `self.inner.tenant`.
270    ///
271    /// v0.8.1 P2: auto-resolves the audit principal from the
272    /// `SOLO_MCP_PRINCIPAL_TOKEN` env var (see [`resolve_mcp_principal`]).
273    /// When neither the env var nor a header is set, the principal stays
274    /// `None` — preserving v0.8.0 behavior for single-user setups.
275    pub fn new_for_tenant(
276        registry: Arc<TenantRegistry>,
277        tenant: Arc<TenantHandle>,
278        user_aliases: Vec<String>,
279    ) -> Self {
280        let principal = resolve_mcp_principal(None);
281        Self::new_for_tenant_with_principal(registry, tenant, user_aliases, principal)
282    }
283
284    /// v0.8.0 P4: like [`Self::new_for_tenant`], but records an explicit
285    /// audit principal subject for every tool dispatch. MCP is
286    /// bearer-only at v0.8.0 — the orchestration layer (today: the
287    /// daemon's `--bearer-token-file` path) decides whether a session
288    /// counts as "bearer-authenticated" and passes `Some("bearer")`;
289    /// CLI / unauth paths pass `None`.
290    ///
291    /// v0.8.1 P2: when the caller passes `audit_principal = None`, the
292    /// env-var auto-resolution still runs (in `new_for_tenant`). Callers
293    /// who want to *explicitly* suppress env-var resolution can call
294    /// this method with `None` after `std::env::remove_var(...)`, or use
295    /// the dedicated test constructor that bypasses env reads.
296    pub fn new_for_tenant_with_principal(
297        registry: Arc<TenantRegistry>,
298        tenant: Arc<TenantHandle>,
299        user_aliases: Vec<String>,
300        audit_principal: Option<String>,
301    ) -> Self {
302        Self {
303            inner: Arc::new(Inner {
304                registry,
305                tenant,
306                user_aliases,
307                audit_principal,
308            }),
309        }
310    }
311}
312
313/// Convenience: run the server over stdio and await its termination.
314/// Returns when stdin closes (parent disconnect) or the runtime exits.
315pub async fn serve_stdio(server: SoloMcpServer) -> anyhow::Result<()> {
316    use rmcp::transport::io::stdio;
317    let (stdin, stdout) = stdio();
318    let running = server.serve((stdin, stdout)).await?;
319    running.waiting().await?;
320    Ok(())
321}
322
323// ---------------------------------------------------------------------------
324// Tool argument schemas
325// ---------------------------------------------------------------------------
326
327#[derive(Debug, Clone, Serialize, Deserialize)]
328pub struct RememberArgs {
329    pub content: String,
330    #[serde(default)]
331    pub source_type: Option<String>,
332    #[serde(default)]
333    pub source_id: Option<String>,
334}
335
336#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct RecallArgs {
338    pub query: String,
339    #[serde(default = "default_limit")]
340    pub limit: usize,
341}
342
343fn default_limit() -> usize {
344    5
345}
346
347#[derive(Debug, Clone, Serialize, Deserialize)]
348pub struct ForgetArgs {
349    pub memory_id: String,
350    #[serde(default = "default_forget_reason")]
351    pub reason: String,
352}
353
354fn default_forget_reason() -> String {
355    "user-initiated via MCP".into()
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize)]
359pub struct InspectArgs {
360    pub memory_id: String,
361}
362
363// Path 1 derived-layer tools (v0.4.0+) — query the Steward's outputs.
364// `solo_query::derived` is the single source of truth; these handlers
365// just translate JSON args to function args and serialise the result
366// vec to JSON for the MCP wire.
367
368#[derive(Debug, Clone, Serialize, Deserialize)]
369pub struct ThemesArgs {
370    /// Optional time window in days; `None` = unfiltered, return up
371    /// to `limit` most-recent themes across all time. `Some(7)` =
372    /// "themes from the last week".
373    #[serde(default)]
374    pub window_days: Option<i64>,
375    #[serde(default = "default_limit")]
376    pub limit: usize,
377}
378
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct FactsAboutArgs {
381    /// Subject id to query — required (predicate-only scans
382    /// intentionally not supported).
383    pub subject: String,
384    #[serde(default)]
385    pub predicate: Option<String>,
386    #[serde(default)]
387    pub since_ms: Option<i64>,
388    #[serde(default)]
389    pub until_ms: Option<i64>,
390    /// v0.5.1 Priority 8 — widen the query to also match rows where
391    /// `subject` appears as the object (e.g. surface "Sam pushes back
392    /// on PRs about Maya" under `facts_about(subject="maya")`).
393    /// Default `false` preserves v0.5.0 behaviour.
394    #[serde(default)]
395    pub include_as_object: bool,
396    #[serde(default = "default_limit")]
397    pub limit: usize,
398}
399
400#[derive(Debug, Clone, Serialize, Deserialize)]
401pub struct ContradictionsArgs {
402    #[serde(default = "default_limit")]
403    pub limit: usize,
404}
405
406/// Args for `memory_inspect_cluster` (v0.5.0 Priority 3). `cluster_id`
407/// is required; `full_content` is opt-in for the rare power-user case
408/// where 200-char-per-episode truncation is too aggressive.
409#[derive(Debug, Clone, Serialize, Deserialize)]
410pub struct InspectClusterArgs {
411    pub cluster_id: String,
412    /// If `true`, episode `content` fields are returned verbatim. If
413    /// `false` or omitted (the default), each episode's content is
414    /// truncated to `solo_query::EPISODE_TRUNCATE_CHARS` chars with a
415    /// trailing `…`.
416    #[serde(default)]
417    pub full_content: bool,
418}
419
420// Document tools (v0.7.0+). Five args structs paired with five handlers.
421// Wire shapes per `docs/dev-log/0083-v0.7.0-implementation-plan.md` §2 P5.
422
423#[derive(Debug, Clone, Serialize, Deserialize)]
424pub struct IngestDocumentArgs {
425    /// Server-side filesystem path to the file to ingest. Must be
426    /// readable by the Solo process. The writer parses the file by
427    /// extension, splits it into ~500-token chunks, embeds each, and
428    /// stores them under `documents` + `document_chunks`.
429    pub path: String,
430}
431
432#[derive(Debug, Clone, Serialize, Deserialize)]
433pub struct SearchDocsArgs {
434    pub query: String,
435    #[serde(default = "default_search_docs_limit")]
436    pub limit: usize,
437}
438
439fn default_search_docs_limit() -> usize {
440    5
441}
442
443#[derive(Debug, Clone, Serialize, Deserialize)]
444pub struct InspectDocumentArgs {
445    pub doc_id: String,
446}
447
448#[derive(Debug, Clone, Serialize, Deserialize)]
449pub struct ListDocumentsArgs {
450    #[serde(default = "default_list_documents_limit")]
451    pub limit: usize,
452    #[serde(default)]
453    pub offset: usize,
454    /// If `true`, also include documents the user has forgotten. Default
455    /// `false` matches the agent-UX expectation that recall + listing
456    /// ignore soft-deleted rows.
457    #[serde(default)]
458    pub include_forgotten: bool,
459}
460
461fn default_list_documents_limit() -> usize {
462    20
463}
464
465#[derive(Debug, Clone, Serialize, Deserialize)]
466pub struct ForgetDocumentArgs {
467    pub doc_id: String,
468}
469
470// ---------------------------------------------------------------------------
471// ServerHandler implementation
472// ---------------------------------------------------------------------------
473
474impl ServerHandler for SoloMcpServer {
475    fn get_info(&self) -> ServerInfo {
476        // rmcp 1.x: ServerInfo is non-exhaustive AND lives in another crate,
477        // so neither struct-literal nor functional-update syntax (..) is
478        // allowed from outside. Build via mut on a Default::default().
479        let capabilities = ServerCapabilities::builder()
480            .enable_tools()
481            .build();
482        let mut info = ServerInfo::default();
483        info.protocol_version = ProtocolVersion::default();
484        info.capabilities = capabilities;
485        // v0.9.1 P1 Fix 1 — `Implementation::from_build_env()` reads
486        // `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from rmcp's OWN build
487        // environment (the helper lives in rmcp, so the proc-macro
488        // expansion captures rmcp's manifest, not ours). On v0.9.0 every
489        // Solo MCP daemon self-identified as `{name: "rmcp", version: "1.7.0"}`.
490        // Pinned by `tests::server_info_identity_is_solo_not_rmcp_or_solo_api`.
491        // The literal `"solo"` (not `env!("CARGO_PKG_NAME")`) is deliberate:
492        // this crate is `solo-api`, but the operator-facing identity is
493        // the binary name `solo`.
494        info.server_info = Implementation::new(
495            "solo".to_string(),
496            env!("CARGO_PKG_VERSION").to_string(),
497        );
498        info.instructions = Some(
499            "Solo gives you persistent memory across conversations \
500                 with this user — what they've told you before, the \
501                 people and projects in their life, and where their \
502                 stated beliefs have shifted, plus a library of \
503                 documents the user has ingested (notes, runbooks, \
504                 PDFs). Reach for these tools whenever the user \
505                 references something from earlier (\"like I \
506                 mentioned\", \"the project I'm working on\", \"my \
507                 friend Alex\", \"the notes I uploaded last week\") \
508                 or asks a question that hinges on personal context \
509                 or document content you don't have in the current \
510                 chat. \
511                 \n\nTools to write or look up specific moments: \
512                 memory_remember (save something worth keeping), \
513                 memory_recall (search past conversations by topic), \
514                 memory_inspect (show one saved item by id), \
515                 memory_forget (delete one saved item). \
516                 \n\nTools for the bigger picture (populated as the \
517                 user uses Solo over time): memory_themes (recent \
518                 topics they've been thinking about), \
519                 memory_facts_about (what you know about a person, \
520                 project, or place — \"what do you know about \
521                 Alex?\"), memory_contradictions (places where the \
522                 user has said two things that disagree — surface \
523                 these before answering), memory_inspect_cluster \
524                 (the raw conversations behind one summary). \
525                 \n\nTools for the user's documents: \
526                 memory_ingest_document (read a file from disk and \
527                 add it to Solo's library), memory_search_docs \
528                 (search across ingested documents by topic — use \
529                 when the user asks about something they wrote down \
530                 or saved as a file), memory_inspect_document (show \
531                 one document's metadata plus a preview of its \
532                 chunks), memory_list_documents (browse documents \
533                 by recency), memory_forget_document (drop a \
534                 document from the library)."
535                .into(),
536        );
537        info
538    }
539
540    /// v0.9.0 P2: override `initialize` so we can:
541    ///
542    ///   1. Cache the client's `InitializeRequestParams` on the peer
543    ///      (delegates to rmcp's default for this).
544    ///   2. If the tenant's `[llm] mode = "mcp_sampling"`:
545    ///      a. Refuse to initialize when the peer didn't advertise the
546    ///         `sampling` capability — surfaces the BLOCKER 2-locked
547    ///         error message so the user sees commented-out
548    ///         alternative TOML blocks.
549    ///      b. Otherwise build a `SamplingLlmClient`-backed Steward and
550    ///         write it into `tenant.steward_slot()` so the writer
551    ///         actor's next consolidate-tick reads a populated slot.
552    ///   3. For any other `[llm]` mode, return the configured tools
553    ///      surface unchanged (the slot was eagerly populated at
554    ///      registry-open time by the static StewardFactory).
555    async fn initialize(
556        &self,
557        request: InitializeRequestParams,
558        context: RequestContext<RoleServer>,
559    ) -> std::result::Result<InitializeResult, McpError> {
560        // Defer to rmcp's default for peer-info caching (matches the
561        // `if peer_info().is_none()` shape).
562        if context.peer.peer_info().is_none() {
563            context.peer.set_peer_info(request.clone());
564        }
565
566        let llm_settings =
567            self.inner.tenant.config().llm.as_ref().cloned();
568        let peer_sampling_supported =
569            request.capabilities.sampling.is_some();
570        match initialize_decision(&llm_settings, peer_sampling_supported) {
571            InitializeDecision::Allow => {}
572            InitializeDecision::PopulateSamplingSteward => {
573                // Build the sampling-backed Steward against the live
574                // peer + the per-tenant write handle, then write it
575                // into the slot.
576                self.populate_sampling_steward(&context).await;
577            }
578            InitializeDecision::RejectMissingSamplingCapability => {
579                return Err(McpError::invalid_request(
580                    sampling_capability_missing_error_message(),
581                    None,
582                ));
583            }
584        }
585
586        Ok(self.get_info())
587    }
588
589    async fn list_tools(
590        &self,
591        _request: Option<PaginatedRequestParam>,
592        _context: RequestContext<RoleServer>,
593    ) -> std::result::Result<ListToolsResult, McpError> {
594        Ok(ListToolsResult {
595            tools: build_tools(),
596            next_cursor: None,
597            ..Default::default()
598        })
599    }
600
601    async fn call_tool(
602        &self,
603        request: CallToolRequestParam,
604        _context: RequestContext<RoleServer>,
605    ) -> std::result::Result<CallToolResult, McpError> {
606        let CallToolRequestParam { name, arguments, .. } = request;
607        let args_value = serde_json::Value::Object(arguments.unwrap_or_default());
608        self.dispatch_tool(&name, args_value).await
609    }
610}
611
612impl SoloMcpServer {
613    /// v0.9.0 P2: build a sampling-backed `Arc<Steward>` for the
614    /// current MCP session and write it into the tenant's
615    /// `steward_slot`. Called from [`Self::initialize`] when:
616    ///
617    ///   * `tenant.config().llm.requires_mcp_peer()` is true, AND
618    ///   * the peer advertised the `sampling` capability.
619    ///
620    /// Implementation notes:
621    ///
622    ///   * `StewardConfig::from_env()` is parsed best-effort; if the
623    ///     env vars are malformed, we fall back to `default()` and
624    ///     log a warning. This matches `daemon.rs`'s tolerance — a
625    ///     bad env var shouldn't block an MCP session from initialising.
626    ///
627    ///   * The slot is OVERWRITTEN unconditionally — a fresh MCP
628    ///     session always wins. If a prior session's
629    ///     `SamplingLlmClient` had outstanding requests, they error out
630    ///     on the rmcp layer when their peer drops.
631    ///
632    ///   * The cached `audit_principal` is the one the MCP server
633    ///     constructed for this session via `resolve_mcp_principal`.
634    ///     Every `peer.create_message` call from this Steward routes
635    ///     that principal through to the per-tenant
636    ///     `AuditOperation::LlmSamplingCall` row.
637    async fn populate_sampling_steward(
638        &self,
639        context: &RequestContext<RoleServer>,
640    ) {
641        let steward_config = solo_steward::StewardConfig::from_env()
642            .unwrap_or_else(|e| {
643                tracing::warn!(
644                    error = %e,
645                    "v0.9.0 P2: StewardConfig::from_env failed at MCP \
646                     initialize; falling back to defaults"
647                );
648                solo_steward::StewardConfig::default()
649            });
650        // v0.9.0 P5 (M3 wiring): read `[sampling]` from the tenant's
651        // already-parsed `SoloConfig`. `SamplingConfig::default()` lands
652        // when the block is omitted (5s window / 10 max-batch); operator
653        // overrides flow through to `build_sampling_steward` and into
654        // `SamplingCoordinator::with_settings`.
655        let sampling_config = self.inner.tenant.config().sampling.clone();
656        let peer = context.peer.clone();
657        let write_handle = self.inner.tenant.write().clone();
658        let steward = crate::llm::build_sampling_steward(
659            peer,
660            write_handle,
661            self.inner.audit_principal.clone(),
662            steward_config,
663            sampling_config.clone(),
664        );
665        let slot = self.inner.tenant.steward_slot();
666        let mut guard = slot.write().await;
667        *guard = Some(steward);
668        tracing::info!(
669            tenant = %self.inner.tenant.tenant_id(),
670            coalesce_window_ms = sampling_config.coalesce_window_ms,
671            coalesce_max_requests = sampling_config.coalesce_max_requests,
672            "v0.9.0 P5: MCP-sampling Steward attached to tenant.steward_slot \
673             (PeerSamplingClient → SamplingCoordinator → SamplingLlmClient)"
674        );
675    }
676
677    /// Direct tool-dispatch path used by both `call_tool` (the
678    /// ServerHandler trait method, behind the rmcp protocol layer) and
679    /// in-process tests that don't want to spin up a full transport pair.
680    /// Bypasses `RequestContext` (which requires a `Peer` not constructible
681    /// outside rmcp internals).
682    pub async fn dispatch_tool(
683        &self,
684        name: &str,
685        args_value: serde_json::Value,
686    ) -> std::result::Result<CallToolResult, McpError> {
687        match name {
688            "memory_remember" => {
689                let args: RememberArgs = parse_args(&args_value)?;
690                self.handle_remember(args).await
691            }
692            "memory_recall" => {
693                let args: RecallArgs = parse_args(&args_value)?;
694                self.handle_recall(args).await
695            }
696            "memory_forget" => {
697                let args: ForgetArgs = parse_args(&args_value)?;
698                self.handle_forget(args).await
699            }
700            "memory_inspect" => {
701                let args: InspectArgs = parse_args(&args_value)?;
702                self.handle_inspect(args).await
703            }
704            "memory_themes" => {
705                let args: ThemesArgs = parse_args(&args_value)?;
706                self.handle_themes(args).await
707            }
708            "memory_facts_about" => {
709                let args: FactsAboutArgs = parse_args(&args_value)?;
710                self.handle_facts_about(args).await
711            }
712            "memory_contradictions" => {
713                let args: ContradictionsArgs = parse_args(&args_value)?;
714                self.handle_contradictions(args).await
715            }
716            "memory_inspect_cluster" => {
717                let args: InspectClusterArgs = parse_args(&args_value)?;
718                self.handle_inspect_cluster(args).await
719            }
720            "memory_ingest_document" => {
721                let args: IngestDocumentArgs = parse_args(&args_value)?;
722                self.handle_ingest_document(args).await
723            }
724            "memory_search_docs" => {
725                let args: SearchDocsArgs = parse_args(&args_value)?;
726                self.handle_search_docs(args).await
727            }
728            "memory_inspect_document" => {
729                let args: InspectDocumentArgs = parse_args(&args_value)?;
730                self.handle_inspect_document(args).await
731            }
732            "memory_list_documents" => {
733                let args: ListDocumentsArgs = parse_args(&args_value)?;
734                self.handle_list_documents(args).await
735            }
736            "memory_forget_document" => {
737                let args: ForgetDocumentArgs = parse_args(&args_value)?;
738                self.handle_forget_document(args).await
739            }
740            other => Err(McpError::invalid_params(
741                format!("unknown tool `{other}`"),
742                None,
743            )),
744        }
745    }
746
747    /// List the tools this server exposes. Mirrors `ServerHandler::list_tools`
748    /// without requiring a RequestContext.
749    pub fn dispatch_list_tools(&self) -> Vec<Tool> {
750        build_tools()
751    }
752}
753
754fn parse_args<T: serde::de::DeserializeOwned>(
755    v: &serde_json::Value,
756) -> std::result::Result<T, McpError> {
757    serde_json::from_value(v.clone()).map_err(|e| {
758        McpError::invalid_params(format!("invalid tool arguments: {e}"), None)
759    })
760}
761
762fn solo_to_mcp(e: solo_core::Error) -> McpError {
763    use solo_core::Error;
764    match e {
765        Error::NotFound(msg) => McpError::invalid_params(msg, None),
766        Error::InvalidInput(msg) => McpError::invalid_params(msg, None),
767        Error::Conflict(msg) => McpError::invalid_params(msg, None),
768        other => McpError::internal_error(other.to_string(), None),
769    }
770}
771
772// ---------------------------------------------------------------------------
773// Tool definitions (JSON Schema)
774// ---------------------------------------------------------------------------
775
776fn build_tools() -> Vec<Tool> {
777    vec![
778        Tool::new(
779            "memory_remember",
780            "Save something the user has told you — a fact, a \
781             preference, a name, a date, a context — so you can pick \
782             it up next conversation. Use whenever the user mentions \
783             something they'd reasonably expect you to recall later \
784             (\"I just started at Quotient\", \"my partner is Maya\"). \
785             Returns the saved item's id.",
786            json_schema_object(serde_json::json!({
787                "type": "object",
788                "properties": {
789                    "content": {
790                        "type": "string",
791                        "description": "The text to remember.",
792                    },
793                    "source_type": {
794                        "type": "string",
795                        "description": "Optional source-type tag (default: \"user_message\").",
796                    },
797                    "source_id": {
798                        "type": "string",
799                        "description": "Optional upstream id for traceability.",
800                    },
801                },
802                "required": ["content"],
803            })),
804        ),
805        Tool::new(
806            "memory_recall",
807            "Search past conversations with this user by topic or \
808             phrase. Returns up to `limit` of the closest matches, \
809             best match first. Use when the user references \
810             something they said before (\"that book I told you \
811             about\", \"the bug we were debugging last week\"). \
812             Skips items the user has deleted.",
813            json_schema_object(serde_json::json!({
814                "type": "object",
815                "properties": {
816                    "query": {
817                        "type": "string",
818                        "description": "The query text.",
819                    },
820                    "limit": {
821                        "type": "integer",
822                        "description": "Maximum results (default 5).",
823                        "minimum": 1,
824                        "maximum": 100,
825                    },
826                },
827                "required": ["query"],
828            })),
829        ),
830        Tool::new(
831            "memory_forget",
832            "Delete one saved item by id. Use when the user asks you \
833             to forget something specific (\"forget that I said \
834             X\"). The item stops appearing in future recalls. \
835             Reversible only via backups.",
836            json_schema_object(serde_json::json!({
837                "type": "object",
838                "properties": {
839                    "memory_id": {
840                        "type": "string",
841                        "description": "MemoryId to forget (UUID v7).",
842                    },
843                    "reason": {
844                        "type": "string",
845                        "description": "Optional free-form reason (logged, not yet persisted).",
846                    },
847                },
848                "required": ["memory_id"],
849            })),
850        ),
851        Tool::new(
852            "memory_inspect",
853            "Show the full record for one saved item — when it was \
854             saved, where it came from, and the full text. Use after \
855             memory_recall when you want the complete content of a \
856             specific hit (recall results may be truncated).",
857            json_schema_object(serde_json::json!({
858                "type": "object",
859                "properties": {
860                    "memory_id": {
861                        "type": "string",
862                        "description": "MemoryId to inspect (UUID v7).",
863                    },
864                },
865                "required": ["memory_id"],
866            })),
867        ),
868        // Path 1 derived-layer tools (v0.4.0+) — query the Steward's
869        // outputs. These four are populated by `solo consolidate` and
870        // were previously unreadable except via direct SQL.
871        Tool::new(
872            "memory_themes",
873            "Recent topics the user has been thinking about. Use to \
874             orient yourself at the start of a conversation, or when \
875             the user asks \"what have I been up to\" / \"what was I \
876             working on last week\". Pass `window_days` to scope \
877             (e.g. 7 for last week); omit for all-time.",
878            json_schema_object(serde_json::json!({
879                "type": "object",
880                "properties": {
881                    "window_days": {
882                        "type": "integer",
883                        "description": "Optional time window in days. Omit for unfiltered.",
884                        "minimum": 1,
885                    },
886                    "limit": {
887                        "type": "integer",
888                        "description": "Maximum results (default 5).",
889                        "minimum": 1,
890                        "maximum": 100,
891                    },
892                },
893            })),
894        ),
895        Tool::new(
896            "memory_facts_about",
897            "Look up what you remember about a person, project, or \
898             topic — names, dates, preferences, relationships. Use \
899             when the user asks \"what do you know about Alex?\", \
900             \"when did I start at Quotient?\", \"who is Maya?\", or \
901             whenever you need grounded facts about someone or \
902             something before answering. Subject is required (the \
903             person/place/thing you're asking about); narrow further \
904             with `predicate` (\"works_at\", \"lives_in\") or a date \
905             range. Set `include_as_object=true` to also surface \
906             facts where the subject appears on the receiving side of \
907             a relationship (e.g. \"Sam pushes back on PRs about \
908             Maya\" surfaces under facts_about(subject=\"Maya\", \
909             include_as_object=true)). (Backed by \
910             subject-predicate-object triples distilled from past \
911             conversations.) Clients should set a 30s timeout on this \
912             call; if exceeded, retry once or fall back to \
913             `memory_recall`.",
914            json_schema_object(serde_json::json!({
915                "type": "object",
916                "properties": {
917                    "subject": {
918                        "type": "string",
919                        "description": "Subject id to query (e.g. 'Sam').",
920                    },
921                    "predicate": {
922                        "type": "string",
923                        "description": "Optional predicate filter (e.g. 'works_at').",
924                    },
925                    "since_ms": {
926                        "type": "integer",
927                        "description": "Optional valid_from_ms lower bound (epoch ms).",
928                    },
929                    "until_ms": {
930                        "type": "integer",
931                        "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through.",
932                    },
933                    "include_as_object": {
934                        "type": "boolean",
935                        "description": "If true, also match facts where `subject` appears as the object (e.g. 'Sam pushes back on PRs about Maya' surfaces under subject='Maya'). Default false.",
936                        "default": false,
937                    },
938                    "limit": {
939                        "type": "integer",
940                        "description": "Maximum results (default 5).",
941                        "minimum": 1,
942                        "maximum": 100,
943                    },
944                },
945                "required": ["subject"],
946            })),
947        ),
948        Tool::new(
949            "memory_contradictions",
950            "Find places where the user's stated beliefs or facts \
951             disagree across conversations — flag disagreements \
952             before answering. Use whenever you're about to rely on \
953             a remembered fact that could have changed (jobs, \
954             relationships, preferences, opinions); a disagreement \
955             here means the user has told you both X and not-X over \
956             time and you should ask which is current instead of \
957             guessing. Each result shows both conflicting statements \
958             with the topic.",
959            json_schema_object(serde_json::json!({
960                "type": "object",
961                "properties": {
962                    "limit": {
963                        "type": "integer",
964                        "description": "Maximum results (default 5).",
965                        "minimum": 1,
966                        "maximum": 100,
967                    },
968                },
969            })),
970        ),
971        Tool::new(
972            "memory_inspect_cluster",
973            "Show the raw conversations behind one summary. Returns \
974             the one-line topic (the LLM-generated summary) and the \
975             source conversations the topic was built from. Use \
976             after memory_themes when the user asks \"show me the \
977             raw context behind this\" or \"why does Solo think \
978             that about cluster Y\". Source items are truncated to \
979             200 chars unless `full_content` is set.",
980            json_schema_object(serde_json::json!({
981                "type": "object",
982                "properties": {
983                    "cluster_id": {
984                        "type": "string",
985                        "description": "Cluster id to inspect (from memory_themes hits).",
986                    },
987                    "full_content": {
988                        "type": "boolean",
989                        "description": "If true, episode content is returned verbatim. Default false (truncate to 200 chars + ellipsis).",
990                    },
991                },
992                "required": ["cluster_id"],
993            })),
994        ),
995        // Document tools (v0.7.0+). RAG over user-supplied files —
996        // markdown notes, PDFs, runbooks, code, etc. Same vector space
997        // as episodes; same embedder; same HNSW index.
998        Tool::new(
999            "memory_ingest_document",
1000            "Read a file from disk and add it to the user's document \
1001             library so it becomes searchable alongside past \
1002             conversations. Use when the user asks you to remember a \
1003             whole file (\"add my notes/runbook.md\", \"ingest this \
1004             PDF\"). The file is split into ~500-token chunks and \
1005             each chunk is embedded; chunks then surface through \
1006             memory_search_docs. Returns the new document id, chunk \
1007             count, and a `deduped` flag (true if the same content \
1008             was already ingested under another id).",
1009            json_schema_object(serde_json::json!({
1010                "type": "object",
1011                "properties": {
1012                    "path": {
1013                        "type": "string",
1014                        "description": "Server-side absolute path to the file to ingest. The file must be readable by the Solo process.",
1015                    },
1016                },
1017                "required": ["path"],
1018            })),
1019        ),
1020        Tool::new(
1021            "memory_search_docs",
1022            "Search across the user's ingested documents by topic or \
1023             phrase. Returns up to `limit` matching chunks, best \
1024             match first, each with the parent document's title + \
1025             source path so you can cite where the answer came from. \
1026             Use when the user asks a question that hinges on \
1027             material they've added as a file (\"what does my \
1028             runbook say about backups?\", \"find the section in the \
1029             notes about the new policy\"). Forgotten documents are \
1030             skipped.",
1031            json_schema_object(serde_json::json!({
1032                "type": "object",
1033                "properties": {
1034                    "query": {
1035                        "type": "string",
1036                        "description": "The query text.",
1037                    },
1038                    "limit": {
1039                        "type": "integer",
1040                        "description": "Maximum results (default 5).",
1041                        "minimum": 1,
1042                        "maximum": 100,
1043                    },
1044                },
1045                "required": ["query"],
1046            })),
1047        ),
1048        Tool::new(
1049            "memory_inspect_document",
1050            "Show one document's metadata plus a preview of every \
1051             chunk it was split into. Use after memory_search_docs \
1052             when the user wants the bigger picture for one hit \
1053             (\"show me the whole document this came from\"), or \
1054             after memory_list_documents to drill into one entry. \
1055             Each chunk preview is truncated to 200 chars.",
1056            json_schema_object(serde_json::json!({
1057                "type": "object",
1058                "properties": {
1059                    "doc_id": {
1060                        "type": "string",
1061                        "description": "Document id to inspect (UUID v7).",
1062                    },
1063                },
1064                "required": ["doc_id"],
1065            })),
1066        ),
1067        Tool::new(
1068            "memory_list_documents",
1069            "List the user's ingested documents, newest first. Use \
1070             when the user asks \"what documents have I added?\" or \
1071             \"show me my files\". Returns a paginated index — pass \
1072             `offset` to page further back. Forgotten documents are \
1073             hidden by default; set `include_forgotten=true` to see \
1074             them too.",
1075            json_schema_object(serde_json::json!({
1076                "type": "object",
1077                "properties": {
1078                    "limit": {
1079                        "type": "integer",
1080                        "description": "Maximum results per page (default 20).",
1081                        "minimum": 1,
1082                        "maximum": 100,
1083                    },
1084                    "offset": {
1085                        "type": "integer",
1086                        "description": "Number of rows to skip (for paging). Default 0.",
1087                        "minimum": 0,
1088                    },
1089                    "include_forgotten": {
1090                        "type": "boolean",
1091                        "description": "If true, also include documents the user has forgotten. Default false.",
1092                    },
1093                },
1094            })),
1095        ),
1096        Tool::new(
1097            "memory_forget_document",
1098            "Drop one document from the user's library by id. Use \
1099             when the user asks you to forget a specific file \
1100             (\"forget my old runbook\"). The document's chunks stop \
1101             appearing in memory_search_docs and the vectors are \
1102             tombstoned in the index. The chunk rows themselves are \
1103             kept for forensic value (a future restore command can \
1104             undo this).",
1105            json_schema_object(serde_json::json!({
1106                "type": "object",
1107                "properties": {
1108                    "doc_id": {
1109                        "type": "string",
1110                        "description": "Document id to forget (UUID v7).",
1111                    },
1112                },
1113                "required": ["doc_id"],
1114            })),
1115        ),
1116    ]
1117}
1118
1119fn json_schema_object(value: serde_json::Value) -> serde_json::Map<String, serde_json::Value> {
1120    match value {
1121        serde_json::Value::Object(map) => map,
1122        _ => panic!("json_schema_object: input must be an object"),
1123    }
1124}
1125
1126/// Names of every tool this server exposes, in registration order.
1127///
1128/// Exposed for cross-crate consumers (notably `solo doctor
1129/// --check-mcp-compat`) that want the name list without paying the
1130/// cost of building full `rmcp::Tool` records (which allocate JSON
1131/// schemas). The registration order matches `build_tools()` so any
1132/// drift between the two would be caught by the cross-provider regex
1133/// test which iterates `build_tools()`.
1134pub fn tool_names() -> Vec<&'static str> {
1135    vec![
1136        "memory_remember",
1137        "memory_recall",
1138        "memory_forget",
1139        "memory_inspect",
1140        "memory_themes",
1141        "memory_facts_about",
1142        "memory_contradictions",
1143        "memory_inspect_cluster",
1144        // Document tools added in v0.7.0:
1145        "memory_ingest_document",
1146        "memory_search_docs",
1147        "memory_inspect_document",
1148        "memory_list_documents",
1149        "memory_forget_document",
1150    ]
1151}
1152
1153// ---------------------------------------------------------------------------
1154// Tool handlers
1155// ---------------------------------------------------------------------------
1156
1157impl SoloMcpServer {
1158    async fn handle_remember(
1159        &self,
1160        args: RememberArgs,
1161    ) -> std::result::Result<CallToolResult, McpError> {
1162        let content = args.content.trim_end().to_string();
1163        if content.is_empty() {
1164            return Err(McpError::invalid_params(
1165                "memory_remember: content must not be empty".to_string(),
1166                None,
1167            ));
1168        }
1169        let embedding: solo_core::Embedding = self
1170            .inner
1171            .tenant
1172            .embedder()
1173            .embed(&content)
1174            .await
1175            .map_err(solo_to_mcp)?;
1176        let episode = Episode {
1177            memory_id: MemoryId::new(),
1178            ts_ms: chrono::Utc::now().timestamp_millis(),
1179            source_type: args.source_type.unwrap_or_else(|| "user_message".into()),
1180            source_id: args.source_id,
1181            content,
1182            encoding_context: EncodingContext::default(),
1183            provenance: None,
1184            confidence: Confidence::new(0.9).unwrap(),
1185            strength: 0.5,
1186            salience: 0.5,
1187            tier: Tier::Hot,
1188        };
1189        let mid = self
1190            .inner
1191            .tenant
1192            .write()
1193            .remember_as(self.inner.audit_principal.clone(), episode, embedding)
1194            .await
1195            .map_err(solo_to_mcp)?;
1196        Ok(CallToolResult::success(vec![Content::text(format!(
1197            "remembered {mid}"
1198        ))]))
1199    }
1200
1201    async fn handle_recall(
1202        &self,
1203        args: RecallArgs,
1204    ) -> std::result::Result<CallToolResult, McpError> {
1205        // Pipeline lives in solo-query; the transport just formats the
1206        // result. solo_query::run_recall validates empty queries
1207        // (returns InvalidInput → invalid_params via solo_to_mcp).
1208        let result = solo_query::run_recall(
1209            self.inner.tenant.as_ref(),
1210            self.inner.audit_principal.clone(),
1211            &args.query,
1212            args.limit,
1213        )
1214        .await
1215        .map_err(solo_to_mcp)?;
1216
1217        if result.hits.is_empty() {
1218            return Ok(CallToolResult::success(vec![Content::text(format!(
1219                "no matches (index has {} vectors)",
1220                result.index_len
1221            ))]));
1222        }
1223        let body = serde_json::to_string_pretty(&result.hits).unwrap_or_else(|_| String::new());
1224        Ok(CallToolResult::success(vec![Content::text(body)]))
1225    }
1226
1227    async fn handle_forget(
1228        &self,
1229        args: ForgetArgs,
1230    ) -> std::result::Result<CallToolResult, McpError> {
1231        let mid = MemoryId::from_str(&args.memory_id).map_err(|e| {
1232            McpError::invalid_params(format!("invalid memory_id: {e}"), None)
1233        })?;
1234        self.inner
1235            .tenant
1236            .write()
1237            .forget_as(self.inner.audit_principal.clone(), mid, args.reason)
1238            .await
1239            .map_err(solo_to_mcp)?;
1240        Ok(CallToolResult::success(vec![Content::text(format!(
1241            "forgotten {mid}"
1242        ))]))
1243    }
1244
1245    async fn handle_inspect(
1246        &self,
1247        args: InspectArgs,
1248    ) -> std::result::Result<CallToolResult, McpError> {
1249        let mid = MemoryId::from_str(&args.memory_id).map_err(|e| {
1250            McpError::invalid_params(format!("invalid memory_id: {e}"), None)
1251        })?;
1252        // Pipeline lives in solo-query::inspect; transports just format.
1253        let row = solo_query::inspect_one(
1254            self.inner.tenant.read(),
1255            self.inner.tenant.audit(),
1256            self.inner.audit_principal.clone(),
1257            mid,
1258        )
1259        .await
1260        .map_err(solo_to_mcp)?;
1261        let body = serde_json::to_string_pretty(&row).unwrap_or_else(|_| String::new());
1262        Ok(CallToolResult::success(vec![Content::text(body)]))
1263    }
1264
1265    // Path 1 derived-layer handlers (v0.4.0+). Each one delegates to a
1266    // single solo-query::derived pipeline and serialises the result Vec
1267    // to pretty JSON for the MCP wire. Empty result → JSON empty array
1268    // `[]` (not a special-case "no matches" string) so MCP clients can
1269    // parse uniformly.
1270
1271    async fn handle_themes(
1272        &self,
1273        args: ThemesArgs,
1274    ) -> std::result::Result<CallToolResult, McpError> {
1275        let hits = solo_query::themes(
1276            self.inner.tenant.read(),
1277            self.inner.tenant.audit(),
1278            self.inner.audit_principal.clone(),
1279            args.window_days,
1280            args.limit,
1281        )
1282        .await
1283        .map_err(solo_to_mcp)?;
1284        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1285        Ok(CallToolResult::success(vec![Content::text(body)]))
1286    }
1287
1288    async fn handle_facts_about(
1289        &self,
1290        args: FactsAboutArgs,
1291    ) -> std::result::Result<CallToolResult, McpError> {
1292        if args.subject.trim().is_empty() {
1293            return Err(McpError::invalid_params(
1294                "memory_facts_about: subject must not be empty".to_string(),
1295                None,
1296            ));
1297        }
1298        let hits = solo_query::facts_about(
1299            self.inner.tenant.read(),
1300            self.inner.tenant.audit(),
1301            self.inner.audit_principal.clone(),
1302            &args.subject,
1303            &self.inner.user_aliases,
1304            args.include_as_object,
1305            args.predicate.as_deref(),
1306            args.since_ms,
1307            args.until_ms,
1308            args.limit,
1309        )
1310        .await
1311        .map_err(solo_to_mcp)?;
1312        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1313        Ok(CallToolResult::success(vec![Content::text(body)]))
1314    }
1315
1316    async fn handle_contradictions(
1317        &self,
1318        args: ContradictionsArgs,
1319    ) -> std::result::Result<CallToolResult, McpError> {
1320        let hits = solo_query::contradictions(
1321            self.inner.tenant.read(),
1322            self.inner.tenant.audit(),
1323            self.inner.audit_principal.clone(),
1324            args.limit,
1325        )
1326        .await
1327        .map_err(solo_to_mcp)?;
1328        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1329        Ok(CallToolResult::success(vec![Content::text(body)]))
1330    }
1331
1332    async fn handle_inspect_cluster(
1333        &self,
1334        args: InspectClusterArgs,
1335    ) -> std::result::Result<CallToolResult, McpError> {
1336        if args.cluster_id.trim().is_empty() {
1337            return Err(McpError::invalid_params(
1338                "memory_inspect_cluster: cluster_id must not be empty".to_string(),
1339                None,
1340            ));
1341        }
1342        // `solo_to_mcp` maps `Error::NotFound` → `invalid_params` for
1343        // MCP (the protocol does not have a separate "not found" error
1344        // shape; clients see the message verbatim, which includes the
1345        // cluster_id).
1346        let record = solo_query::inspect_cluster(
1347            self.inner.tenant.read(),
1348            self.inner.tenant.audit(),
1349            self.inner.audit_principal.clone(),
1350            &args.cluster_id,
1351            args.full_content,
1352        )
1353        .await
1354        .map_err(solo_to_mcp)?;
1355        let body = serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
1356        Ok(CallToolResult::success(vec![Content::text(body)]))
1357    }
1358
1359    // Document handlers (v0.7.0+). Each wraps the corresponding writer
1360    // / query API; the MCP wire shape is plain JSON serialisation of
1361    // the returned report / records.
1362
1363    async fn handle_ingest_document(
1364        &self,
1365        args: IngestDocumentArgs,
1366    ) -> std::result::Result<CallToolResult, McpError> {
1367        if args.path.trim().is_empty() {
1368            return Err(McpError::invalid_params(
1369                "memory_ingest_document: path must not be empty".to_string(),
1370                None,
1371            ));
1372        }
1373        let path = std::path::PathBuf::from(args.path);
1374        // Defaults match what the daemon uses today (target 500 tokens,
1375        // 50-token overlap). Future: thread a per-call override through
1376        // the args struct if a use case appears.
1377        let chunk_config = solo_storage::document::ChunkConfig::default();
1378        let report = self
1379            .inner
1380            .tenant
1381            .write()
1382            .ingest_document_as(self.inner.audit_principal.clone(), path, chunk_config)
1383            .await
1384            .map_err(solo_to_mcp)?;
1385        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
1386        Ok(CallToolResult::success(vec![Content::text(body)]))
1387    }
1388
1389    async fn handle_search_docs(
1390        &self,
1391        args: SearchDocsArgs,
1392    ) -> std::result::Result<CallToolResult, McpError> {
1393        // `solo_query::run_doc_search` validates empty queries (returns
1394        // InvalidInput → invalid_params via solo_to_mcp) and clamps
1395        // limit upstream of the embedder call.
1396        let hits = solo_query::run_doc_search(
1397            self.inner.tenant.as_ref(),
1398            self.inner.audit_principal.clone(),
1399            &args.query,
1400            args.limit,
1401        )
1402        .await
1403        .map_err(solo_to_mcp)?;
1404        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1405        Ok(CallToolResult::success(vec![Content::text(body)]))
1406    }
1407
1408    async fn handle_inspect_document(
1409        &self,
1410        args: InspectDocumentArgs,
1411    ) -> std::result::Result<CallToolResult, McpError> {
1412        let doc_id = DocumentId::from_str(&args.doc_id).map_err(|e| {
1413            McpError::invalid_params(format!("invalid doc_id: {e}"), None)
1414        })?;
1415        let result_opt = solo_query::inspect_document(
1416            self.inner.tenant.read(),
1417            self.inner.tenant.audit(),
1418            self.inner.audit_principal.clone(),
1419            &doc_id,
1420        )
1421        .await
1422        .map_err(solo_to_mcp)?;
1423        match result_opt {
1424            Some(record) => {
1425                let body =
1426                    serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
1427                Ok(CallToolResult::success(vec![Content::text(body)]))
1428            }
1429            None => Err(McpError::invalid_params(
1430                format!("document {doc_id} not found"),
1431                None,
1432            )),
1433        }
1434    }
1435
1436    async fn handle_list_documents(
1437        &self,
1438        args: ListDocumentsArgs,
1439    ) -> std::result::Result<CallToolResult, McpError> {
1440        let rows = solo_query::list_documents(
1441            self.inner.tenant.read(),
1442            self.inner.tenant.audit(),
1443            self.inner.audit_principal.clone(),
1444            args.limit,
1445            args.offset,
1446            args.include_forgotten,
1447        )
1448        .await
1449        .map_err(solo_to_mcp)?;
1450        let body = serde_json::to_string_pretty(&rows).unwrap_or_else(|_| String::new());
1451        Ok(CallToolResult::success(vec![Content::text(body)]))
1452    }
1453
1454    async fn handle_forget_document(
1455        &self,
1456        args: ForgetDocumentArgs,
1457    ) -> std::result::Result<CallToolResult, McpError> {
1458        let doc_id = DocumentId::from_str(&args.doc_id).map_err(|e| {
1459            McpError::invalid_params(format!("invalid doc_id: {e}"), None)
1460        })?;
1461        let report = self
1462            .inner
1463            .tenant
1464            .write()
1465            .forget_document_as(self.inner.audit_principal.clone(), doc_id)
1466            .await
1467            .map_err(solo_to_mcp)?;
1468        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
1469        Ok(CallToolResult::success(vec![Content::text(body)]))
1470    }
1471}
1472
1473#[cfg(test)]
1474mod dispatch_tests {
1475    //! In-process integration tests for the MCP tool surface. We invoke
1476    //! `SoloMcpServer::dispatch_tool` directly (bypasses the rmcp
1477    //! protocol framing + `RequestContext`, which requires a `Peer`
1478    //! that's not constructible outside rmcp internals). The server is
1479    //! constructed against a real WriterActor + ReaderPool +
1480    //! StubEmbedder + StubVectorIndex from `solo_storage::test_support`.
1481    //!
1482    //! Tests live inline in this module rather than `tests/` because an
1483    //! external integration-test exe in `target/debug/deps/mcp_dispatch-*`
1484    //! tripped Windows UAC ERROR_ELEVATION_REQUIRED on the dev machine.
1485    //! The lib test binary doesn't have that issue.
1486    use super::*;
1487    use serde_json::json;
1488    use solo_core::VectorIndex;
1489    use solo_storage::test_support::StubVectorIndex;
1490    use solo_storage::{
1491        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig,
1492        StubEmbedder, TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
1493    };
1494    use std::sync::Arc as StdArc;
1495
1496    fn fake_config(dim: u32) -> SoloConfig {
1497        SoloConfig {
1498            schema_version: 1,
1499            salt_hex: "00000000000000000000000000000000".to_string(),
1500            embedder: EmbedderConfig {
1501                name: "stub".to_string(),
1502                version: "v1".to_string(),
1503                dim,
1504                dtype: "f32".to_string(),
1505            },
1506            identity: IdentityConfig::default(),
1507            documents: solo_storage::DocumentConfig::default(),
1508            auth: None,
1509            audit: solo_storage::AuditSettings::default(),
1510            redaction: solo_storage::RedactionConfig::default(),
1511            llm: None,
1512            triples: solo_storage::TriplesConfig::default(),
1513            sampling: solo_storage::SamplingConfig::default(),
1514        }
1515    }
1516
1517    struct Harness {
1518        server: SoloMcpServer,
1519        _tmp: tempfile::TempDir,
1520        write_handle_extra: Option<solo_storage::WriteHandle>,
1521        join: Option<std::thread::JoinHandle<()>>,
1522    }
1523
1524    impl Harness {
1525        fn new(runtime: &tokio::runtime::Runtime) -> Self {
1526            let tmp = tempfile::TempDir::new().unwrap();
1527            let dim = 16usize;
1528            let hnsw: StdArc<dyn VectorIndex + Send + Sync> = StdArc::new(StubVectorIndex::new(dim));
1529            let embedder: StdArc<dyn solo_core::Embedder> = StdArc::new(StubEmbedder::new("stub", "v1", dim));
1530
1531            let conn = solo_storage::test_support::open_test_db_at(&tmp.path().join("test.db"));
1532            let WriterSpawn { handle, join } = WriterActor::spawn(conn, hnsw.clone());
1533
1534            // ReaderPool's deadpool::Pool needs a live tokio runtime for
1535            // both build + drop; build inside block_on.
1536            let path = tmp.path().join("test.db");
1537            let pool: ReaderPool =
1538                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });
1539
1540            let tenant_id = solo_core::TenantId::default_tenant();
1541            let tenant_handle = StdArc::new(
1542                TenantHandle::from_parts_for_tests(
1543                    tenant_id.clone(),
1544                    fake_config(dim as u32),
1545                    path.clone(),
1546                    tmp.path().to_path_buf(),
1547                    0, // embedder_id; tests using full embedder_id path build their own
1548                    hnsw,
1549                    embedder.clone(),
1550                    handle.clone(),
1551                    std::thread::spawn(|| {}),
1552                    pool,
1553                ),
1554            );
1555            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
1556            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
1557                tmp.path().to_path_buf(),
1558                key,
1559                embedder,
1560                tenant_handle.clone(),
1561            ));
1562            let server = SoloMcpServer::new_for_tenant(registry, tenant_handle, Vec::new());
1563            Harness {
1564                server,
1565                _tmp: tmp,
1566                write_handle_extra: Some(handle),
1567                join: Some(join),
1568            }
1569        }
1570
1571        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
1572            // The whole shutdown runs inside block_on so deadpool-sqlite's
1573            // drop (which schedules cleanup on the active runtime) sees a
1574            // live reactor. Without this, dropping the SoloMcpServer
1575            // (which holds the ReaderPool through its Arc<Inner>) panics
1576            // with "no reactor running".
1577            let join = self.join.take();
1578            let extra = self.write_handle_extra.take();
1579            runtime.block_on(async move {
1580                drop(extra);
1581                drop(self.server);
1582                drop(self._tmp);
1583                if let Some(join) = join {
1584                    let (tx, rx) = std::sync::mpsc::channel();
1585                    std::thread::spawn(move || {
1586                        let _ = tx.send(join.join());
1587                    });
1588                    tokio::task::spawn_blocking(move || {
1589                        rx.recv_timeout(std::time::Duration::from_secs(5))
1590                    })
1591                    .await
1592                    .expect("blocking task")
1593                    .expect("writer thread did not exit within 5s")
1594                    .expect("writer thread panicked");
1595                }
1596            });
1597        }
1598    }
1599
1600    fn rt() -> tokio::runtime::Runtime {
1601        tokio::runtime::Builder::new_multi_thread()
1602            .worker_threads(2)
1603            .enable_all()
1604            .build()
1605            .unwrap()
1606    }
1607
1608    /// Pull the first Content::text body out of a CallToolResult. Use
1609    /// serde_json roundtrip as a robust extractor — `Content`'s public
1610    /// API doesn't directly expose the inner text without going through
1611    /// pattern-matching on RawContent.
1612    fn first_text(r: &rmcp::model::CallToolResult) -> String {
1613        let first = r.content.first().expect("at least one content item");
1614        let v = serde_json::to_value(first).expect("content serialises");
1615        v.get("text")
1616            .and_then(|t| t.as_str())
1617            .map(|s| s.to_string())
1618            .unwrap_or_else(|| format!("{v}"))
1619    }
1620
1621    #[test]
1622    fn tools_list_returns_thirteen_canonical_tools() {
1623        let runtime = rt();
1624        let h = Harness::new(&runtime);
1625        let tools = h.server.dispatch_list_tools();
1626        let names: Vec<&str> = tools.iter().map(|t| t.name.as_ref()).collect();
1627        assert_eq!(
1628            names,
1629            vec![
1630                "memory_remember",
1631                "memory_recall",
1632                "memory_forget",
1633                "memory_inspect",
1634                // Derived-layer tools added in v0.4.0:
1635                "memory_themes",
1636                "memory_facts_about",
1637                "memory_contradictions",
1638                // Added in v0.5.0 (Priority 3):
1639                "memory_inspect_cluster",
1640                // Document tools added in v0.7.0:
1641                "memory_ingest_document",
1642                "memory_search_docs",
1643                "memory_inspect_document",
1644                "memory_list_documents",
1645                "memory_forget_document",
1646            ]
1647        );
1648        for t in &tools {
1649            // rmcp 1.x: Tool.description is Option<Cow<'static, str>>.
1650            let desc = t.description.as_deref().unwrap_or("");
1651            assert!(!desc.is_empty(), "{} description empty", t.name);
1652            let _schema = t.schema_as_json_value();
1653            // `required` is intentionally absent on memory_themes +
1654            // memory_contradictions + memory_list_documents (all args
1655            // optional with defaults). memory_facts_about has required
1656            // = ["subject"], etc. We don't assert per-tool 'required'
1657            // shape here; the schema's `properties` field is the more
1658            // important signal and is always present.
1659        }
1660        h.shutdown(&runtime);
1661    }
1662
1663    #[test]
1664    fn themes_returns_json_array_on_empty_db() {
1665        let runtime = rt();
1666        let h = Harness::new(&runtime);
1667        runtime.block_on(async {
1668            let r = h
1669                .server
1670                .dispatch_tool("memory_themes", json!({}))
1671                .await
1672                .expect("themes succeeds");
1673            let text = first_text(&r);
1674            // Empty derived layer → empty array JSON. Parses cleanly.
1675            let v: serde_json::Value =
1676                serde_json::from_str(&text).expect("parses as json");
1677            assert!(v.is_array(), "expected array, got: {text}");
1678            assert_eq!(v.as_array().unwrap().len(), 0);
1679        });
1680        h.shutdown(&runtime);
1681    }
1682
1683    #[test]
1684    fn themes_passes_through_window_and_limit_args() {
1685        let runtime = rt();
1686        let h = Harness::new(&runtime);
1687        runtime.block_on(async {
1688            // Should not crash with optional + integer args present.
1689            let r = h
1690                .server
1691                .dispatch_tool(
1692                    "memory_themes",
1693                    json!({ "window_days": 7, "limit": 20 }),
1694                )
1695                .await
1696                .expect("themes with args succeeds");
1697            let text = first_text(&r);
1698            let v: serde_json::Value =
1699                serde_json::from_str(&text).expect("parses as json");
1700            assert!(v.is_array());
1701        });
1702        h.shutdown(&runtime);
1703    }
1704
1705    #[test]
1706    fn facts_about_rejects_empty_subject() {
1707        let runtime = rt();
1708        let h = Harness::new(&runtime);
1709        runtime.block_on(async {
1710            let err = h
1711                .server
1712                .dispatch_tool(
1713                    "memory_facts_about",
1714                    json!({ "subject": "   " }),
1715                )
1716                .await
1717                .expect_err("empty subject must error");
1718            // McpError doesn't expose a clean kind/message accessor; just
1719            // verify the error fires (validation path reached).
1720            let s = format!("{err:?}");
1721            assert!(
1722                s.to_lowercase().contains("subject")
1723                    || s.to_lowercase().contains("invalid"),
1724                "got: {s}"
1725            );
1726        });
1727        h.shutdown(&runtime);
1728    }
1729
1730    #[test]
1731    fn facts_about_returns_array_for_unknown_subject() {
1732        let runtime = rt();
1733        let h = Harness::new(&runtime);
1734        runtime.block_on(async {
1735            let r = h
1736                .server
1737                .dispatch_tool(
1738                    "memory_facts_about",
1739                    json!({ "subject": "NobodyKnowsThisSubject" }),
1740                )
1741                .await
1742                .expect("facts_about with unknown subject succeeds");
1743            let text = first_text(&r);
1744            let v: serde_json::Value =
1745                serde_json::from_str(&text).expect("parses as json");
1746            assert_eq!(v.as_array().unwrap().len(), 0);
1747        });
1748        h.shutdown(&runtime);
1749    }
1750
1751    #[test]
1752    fn facts_about_accepts_include_as_object_arg() {
1753        // Asserts the v0.5.1 P8 arg is parsed (serde default lets it
1754        // be omitted) and forwarded to the query lib without choking
1755        // the dispatcher. We don't seed triples — what we need to
1756        // verify is that the optional bool flows through. Both with
1757        // and without the arg, dispatch succeeds and returns an
1758        // empty array. (Functional coverage of the object-position
1759        // widening lives in the query-crate tests.)
1760        let runtime = rt();
1761        let h = Harness::new(&runtime);
1762        runtime.block_on(async {
1763            // With include_as_object=true.
1764            let r = h
1765                .server
1766                .dispatch_tool(
1767                    "memory_facts_about",
1768                    json!({ "subject": "Maya", "include_as_object": true }),
1769                )
1770                .await
1771                .expect("dispatch with include_as_object=true succeeds");
1772            let v: serde_json::Value = serde_json::from_str(&first_text(&r))
1773                .expect("parses as json");
1774            assert_eq!(v.as_array().unwrap().len(), 0);
1775
1776            // Omitted entirely — must default to false (no error).
1777            let r = h
1778                .server
1779                .dispatch_tool(
1780                    "memory_facts_about",
1781                    json!({ "subject": "Maya" }),
1782                )
1783                .await
1784                .expect("dispatch without include_as_object succeeds (default false)");
1785            let v: serde_json::Value = serde_json::from_str(&first_text(&r))
1786                .expect("parses as json");
1787            assert_eq!(v.as_array().unwrap().len(), 0);
1788        });
1789        h.shutdown(&runtime);
1790    }
1791
1792    #[test]
1793    fn contradictions_returns_json_array_on_empty_db() {
1794        let runtime = rt();
1795        let h = Harness::new(&runtime);
1796        runtime.block_on(async {
1797            let r = h
1798                .server
1799                .dispatch_tool("memory_contradictions", json!({}))
1800                .await
1801                .expect("contradictions succeeds");
1802            let text = first_text(&r);
1803            let v: serde_json::Value =
1804                serde_json::from_str(&text).expect("parses as json");
1805            assert!(v.is_array());
1806            assert_eq!(v.as_array().unwrap().len(), 0);
1807        });
1808        h.shutdown(&runtime);
1809    }
1810
1811    #[test]
1812    fn remember_then_recall_round_trip() {
1813        let runtime = rt();
1814        let h = Harness::new(&runtime);
1815        // Use &h.server directly (no clone) so the only outstanding
1816        // reference at shutdown time is the harness's own. The clone
1817        // path triggered a 5-second writer-thread timeout because the
1818        // local clone held an Arc<Inner> with its own WriteHandle past
1819        // h.shutdown().
1820        runtime.block_on(async {
1821            let r = h
1822                .server
1823                .dispatch_tool("memory_remember", json!({ "content": "the cat sat on the mat" }))
1824                .await
1825                .expect("remember succeeds");
1826            let text = first_text(&r);
1827            assert!(text.starts_with("remembered "), "got: {text}");
1828
1829            let r = h
1830                .server
1831                .dispatch_tool(
1832                    "memory_recall",
1833                    json!({ "query": "the cat sat on the mat", "limit": 5 }),
1834                )
1835                .await
1836                .expect("recall succeeds");
1837            let text = first_text(&r);
1838            assert!(text.contains("the cat sat on the mat"), "got: {text}");
1839        });
1840        h.shutdown(&runtime);
1841    }
1842
1843    #[test]
1844    fn forget_excludes_row_from_subsequent_recall() {
1845        let runtime = rt();
1846        let h = Harness::new(&runtime);
1847
1848        runtime.block_on(async {
1849            let r = h
1850                .server
1851                .dispatch_tool("memory_remember", json!({ "content": "to be forgotten" }))
1852                .await
1853                .unwrap();
1854            let text = first_text(&r);
1855            let mid = text.strip_prefix("remembered ").unwrap().to_string();
1856
1857            h.server
1858                .dispatch_tool(
1859                    "memory_forget",
1860                    json!({ "memory_id": mid, "reason": "test" }),
1861                )
1862                .await
1863                .expect("forget succeeds");
1864
1865            let r = h
1866                .server
1867                .dispatch_tool(
1868                    "memory_recall",
1869                    json!({ "query": "to be forgotten", "limit": 5 }),
1870                )
1871                .await
1872                .unwrap();
1873            let text = first_text(&r);
1874            assert!(
1875                !text.contains(r#""content": "to be forgotten""#),
1876                "forgotten row should be excluded; got: {text}"
1877            );
1878        });
1879        h.shutdown(&runtime);
1880    }
1881
1882    #[test]
1883    fn empty_remember_returns_invalid_params() {
1884        let runtime = rt();
1885        let h = Harness::new(&runtime);
1886        runtime.block_on(async {
1887            let err = h
1888                .server
1889                .dispatch_tool("memory_remember", json!({ "content": "" }))
1890                .await
1891                .unwrap_err();
1892            assert!(format!("{err:?}").contains("must not be empty"));
1893        });
1894        h.shutdown(&runtime);
1895    }
1896
1897    #[test]
1898    fn empty_recall_query_returns_invalid_params() {
1899        let runtime = rt();
1900        let h = Harness::new(&runtime);
1901        runtime.block_on(async {
1902            let err = h
1903                .server
1904                .dispatch_tool("memory_recall", json!({ "query": "   " }))
1905                .await
1906                .unwrap_err();
1907            assert!(format!("{err:?}").contains("must not be empty"));
1908        });
1909        h.shutdown(&runtime);
1910    }
1911
1912    #[test]
1913    fn inspect_with_invalid_id_returns_invalid_params() {
1914        let runtime = rt();
1915        let h = Harness::new(&runtime);
1916        runtime.block_on(async {
1917            let err = h
1918                .server
1919                .dispatch_tool("memory_inspect", json!({ "memory_id": "not-a-uuid" }))
1920                .await
1921                .unwrap_err();
1922            assert!(format!("{err:?}").contains("invalid memory_id"));
1923        });
1924        h.shutdown(&runtime);
1925    }
1926
1927    #[test]
1928    fn forget_unknown_id_returns_invalid_params() {
1929        let runtime = rt();
1930        let h = Harness::new(&runtime);
1931        runtime.block_on(async {
1932            // Valid UUID format but not in episodes — handle_forget
1933            // surfaces NotFound, mapped to invalid_params per
1934            // solo_to_mcp.
1935            let err = h
1936                .server
1937                .dispatch_tool(
1938                    "memory_forget",
1939                    json!({ "memory_id": "00000000-0000-7000-8000-000000000000" }),
1940                )
1941                .await
1942                .unwrap_err();
1943            assert!(format!("{err:?}").contains("not found"));
1944        });
1945        h.shutdown(&runtime);
1946    }
1947
1948    #[test]
1949    fn unknown_tool_name_returns_invalid_params() {
1950        let runtime = rt();
1951        let h = Harness::new(&runtime);
1952        runtime.block_on(async {
1953            let err = h
1954                .server
1955                .dispatch_tool("memory.summon", json!({}))
1956                .await
1957                .unwrap_err();
1958            assert!(format!("{err:?}").contains("unknown tool"));
1959        });
1960        h.shutdown(&runtime);
1961    }
1962
1963    /// Regression guard for v0.4.1's MCP tool name fix, generalised
1964    /// in v0.5.0 Priority 4 to cover **all three** major LLM
1965    /// providers, not just Anthropic.
1966    ///
1967    /// Each provider enforces its own tool-name regex on the
1968    /// function-calling wire. A tool name has to satisfy ALL of them
1969    /// to be portable across clients:
1970    ///
1971    ///   - **Anthropic**: `^[a-zA-Z0-9_-]{1,64}$` (what shipped in
1972    ///     v0.4.1; failing this rejects the entire toolset on Claude
1973    ///     Desktop / Cursor / Claude Code with
1974    ///     `FrontendRemoteMcpToolDefinition.name: String should
1975    ///     match pattern ...`).
1976    ///   - **OpenAI** function-calling: `^[a-zA-Z_][a-zA-Z0-9_-]*$`
1977    ///     with length ≤ 64 (must start with letter or underscore).
1978    ///   - **Gemini** function-calling: documented as a-z, A-Z, 0-9,
1979    ///     underscores and dashes; some sources also allow dots. We
1980    ///     use the conservative intersection — must start with
1981    ///     letter or underscore, alphanumeric + underscore only (no
1982    ///     hyphen, no dot), length ≤ 63. This is the strictest of
1983    ///     the three patterns, so any tool that passes it also
1984    ///     passes the other two. Sources differ on whether Gemini
1985    ///     accepts dots or hyphens; the strictest reading guards us
1986    ///     against the future where one provider tightens the regex
1987    ///     (which is the failure mode v0.4.1 hit on Anthropic). See
1988    ///     <https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/docs/api/google/generativeai/protos/FunctionDeclaration.md>
1989    ///     and <https://ai.google.dev/gemini-api/docs/function-calling>.
1990    ///
1991    /// Lesson banked v0.3 #8: rmcp framing tests pass dot-named
1992    /// tools fine because rmcp's own client-side validation is
1993    /// permissive. Only the downstream provider API enforces the
1994    /// regex. This test gates the names at `cargo test` time so any
1995    /// future tool-name change has to pass all three provider
1996    /// regexes before reaching real clients.
1997    #[test]
1998    fn tool_names_match_cross_provider_regex() {
1999        /// Anthropic API name regex: `^[a-zA-Z0-9_-]{1,64}$`.
2000        fn passes_anthropic(name: &str) -> bool {
2001            let len = name.len();
2002            if !(1..=64).contains(&len) {
2003                return false;
2004            }
2005            name.chars()
2006                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
2007        }
2008
2009        /// OpenAI function-calling name regex:
2010        /// `^[a-zA-Z_][a-zA-Z0-9_-]*$`, length ≤ 64.
2011        fn passes_openai(name: &str) -> bool {
2012            let len = name.len();
2013            if !(1..=64).contains(&len) {
2014                return false;
2015            }
2016            let mut chars = name.chars();
2017            let first = match chars.next() {
2018                Some(c) => c,
2019                None => return false,
2020            };
2021            if !(first.is_ascii_alphabetic() || first == '_') {
2022                return false;
2023            }
2024            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
2025        }
2026
2027        /// Gemini function-calling name regex (conservative
2028        /// reading): `^[a-zA-Z_][a-zA-Z0-9_]*$`, length ≤ 63. No
2029        /// hyphen, no dot — strictest of the three so any name that
2030        /// passes this passes the other two.
2031        fn passes_gemini(name: &str) -> bool {
2032            let len = name.len();
2033            if !(1..=63).contains(&len) {
2034                return false;
2035            }
2036            let mut chars = name.chars();
2037            let first = match chars.next() {
2038                Some(c) => c,
2039                None => return false,
2040            };
2041            if !(first.is_ascii_alphabetic() || first == '_') {
2042                return false;
2043            }
2044            chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
2045        }
2046
2047        let tools = build_tools();
2048        assert_eq!(
2049            tools.len(),
2050            13,
2051            "expected 13 tools in v0.7.0 (8 v0.5.x + 5 document tools)"
2052        );
2053        // Sanity-check that tool_names() agrees with build_tools().
2054        let tool_name_strings: Vec<String> =
2055            tools.iter().map(|t| t.name.to_string()).collect();
2056        let public_names: Vec<String> =
2057            super::tool_names().iter().map(|s| s.to_string()).collect();
2058        assert_eq!(
2059            tool_name_strings, public_names,
2060            "tool_names() drifted from build_tools() — keep them in sync"
2061        );
2062
2063        for t in tools {
2064            assert!(
2065                passes_anthropic(&t.name),
2066                "tool name {:?} fails Anthropic regex \
2067                 ^[a-zA-Z0-9_-]{{1,64}}$ — see v0.3 lesson #8",
2068                t.name
2069            );
2070            assert!(
2071                passes_openai(&t.name),
2072                "tool name {:?} fails OpenAI function-calling regex \
2073                 ^[a-zA-Z_][a-zA-Z0-9_-]*$ (len ≤ 64)",
2074                t.name
2075            );
2076            assert!(
2077                passes_gemini(&t.name),
2078                "tool name {:?} fails Gemini function-calling regex \
2079                 ^[a-zA-Z_][a-zA-Z0-9_]*$ (len ≤ 63, strict)",
2080                t.name
2081            );
2082        }
2083    }
2084
2085    /// Regression guard for the v0.5.0 Priority 4 jargon pass.
2086    ///
2087    /// Tool descriptions and `get_info().instructions` are the first
2088    /// (and often only) thing a calling LLM reads when its
2089    /// tool-search mechanism decides whether Solo's tools are
2090    /// relevant. Earlier descriptions leaned on Solo-internal
2091    /// vocabulary (`SPO`, `Steward`, `LEFT JOIN`, `candidate pair`,
2092    /// `tagged_with`) which doesn't pattern-match natural-language
2093    /// agent queries like "what do you know about Alex?" — that's
2094    /// the load-bearing v0.5.0 finding from the 2026-05-14
2095    /// thesis-test in Claude Desktop.
2096    ///
2097    /// This test pins the de-jargoning by forbidding the old
2098    /// vocabulary from appearing in any user-facing text. Future
2099    /// contributors who reach for jargon trip the test and have to
2100    /// pick plain-English phrasing instead.
2101    #[test]
2102    fn tool_descriptions_avoid_internal_jargon() {
2103        // Case-insensitive substring match. Drawn from the
2104        // pre-Priority-4 descriptions; expand only if a new term
2105        // creeps in.
2106        const FORBIDDEN: &[&str] = &[
2107            "SPO",
2108            "Steward",
2109            "Steward-flagged",
2110            "LEFT JOIN",
2111            "candidate pair",
2112            "candidate_pair",
2113            "tagged_with",
2114        ];
2115
2116        fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
2117            haystack.to_lowercase().contains(&needle.to_lowercase())
2118        }
2119
2120        // 1. Each tool description.
2121        for t in build_tools() {
2122            let desc = t.description.as_deref().unwrap_or("");
2123            for term in FORBIDDEN {
2124                assert!(
2125                    !contains_case_insensitive(desc, term),
2126                    "tool {:?} description contains forbidden jargon \
2127                     {:?} — rewrite in plain English (see v0.5.0 \
2128                     Priority 4)",
2129                    t.name,
2130                    term,
2131                );
2132            }
2133        }
2134
2135        // 2. The server-level instructions (what tool-search sees
2136        // first).
2137        let server_info = harness_server_info();
2138        let instructions = server_info
2139            .instructions
2140            .as_deref()
2141            .expect("get_info() must set instructions");
2142        for term in FORBIDDEN {
2143            assert!(
2144                !contains_case_insensitive(instructions, term),
2145                "get_info().instructions contains forbidden jargon \
2146                 {:?} — rewrite in plain English",
2147                term,
2148            );
2149        }
2150    }
2151
2152    /// Build a `ServerInfo` for the jargon test without spinning up
2153    /// the full harness (which needs tokio + tempdir). The
2154    /// `ServerHandler::get_info()` method doesn't take `&self` state
2155    /// in any meaningful way for our impl — it returns a static
2156    /// `ServerInfo` literal — so we construct a minimal-input server
2157    /// just to call it.
2158    fn harness_server_info() -> rmcp::model::ServerInfo {
2159        let runtime = rt();
2160        let h = Harness::new(&runtime);
2161        let info = ServerHandler::get_info(&h.server);
2162        h.shutdown(&runtime);
2163        info
2164    }
2165
2166    /// Regression guard for the v0.9.0 → v0.9.1 P1 Fix 1 MCP
2167    /// `serverInfo` identity regression.
2168    ///
2169    /// In v0.9.0, P0a's rmcp 0.1.5 → 1.7 bump replaced the explicit
2170    /// `Implementation::new("solo", "<version>")` constructor with
2171    /// `Implementation::from_build_env()`. That helper reads
2172    /// `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from **rmcp's own** build
2173    /// environment (the proc-macro expansion captures rmcp's
2174    /// `Cargo.toml`, not the consumer's). Every Solo MCP daemon on
2175    /// v0.9.0 self-identified as `{name: "rmcp", version: "1.7.0"}`
2176    /// instead of `{name: "solo", version: "<workspace.version>"}`.
2177    ///
2178    /// Pins:
2179    ///   - `name == "solo"` (the operator-facing binary name, not
2180    ///     `"solo-api"` which would come from
2181    ///     `env!("CARGO_PKG_NAME")` against this crate's manifest);
2182    ///   - `version == env!("CARGO_PKG_VERSION")` from solo-api's own
2183    ///     compile environment (this is the workspace.package version
2184    ///     via inheritance, so it stays in sync with `solo --version`
2185    ///     and `solo-cli`'s identity).
2186    #[test]
2187    fn server_info_identity_is_solo_not_rmcp_or_solo_api() {
2188        let info = harness_server_info();
2189        let name = info.server_info.name.as_str();
2190        let version = info.server_info.version.as_str();
2191        assert_eq!(
2192            name, "solo",
2193            "MCP serverInfo.name must be \"solo\" (not \"rmcp\" or \
2194             \"solo-api\"). got name={name:?} version={version:?}"
2195        );
2196        assert_eq!(
2197            version,
2198            env!("CARGO_PKG_VERSION"),
2199            "MCP serverInfo.version must match solo-api's compile-time \
2200             CARGO_PKG_VERSION (i.e. the workspace.package version); \
2201             a mismatch means we regressed back to rmcp's build env. \
2202             got version={version:?}"
2203        );
2204    }
2205
2206    // ---- memory_inspect_cluster (v0.5.0 Priority 3) ----
2207
2208    #[test]
2209    fn inspect_cluster_unknown_id_returns_invalid_params() {
2210        // NotFound from solo_query::inspect_cluster is mapped through
2211        // `solo_to_mcp` to `invalid_params` (MCP has no separate
2212        // not-found error shape). Error message should name the id.
2213        let runtime = rt();
2214        let h = Harness::new(&runtime);
2215        runtime.block_on(async {
2216            let err = h
2217                .server
2218                .dispatch_tool(
2219                    "memory_inspect_cluster",
2220                    json!({ "cluster_id": "no-such-cluster" }),
2221                )
2222                .await
2223                .expect_err("unknown cluster must error");
2224            let s = format!("{err:?}");
2225            assert!(
2226                s.contains("no-such-cluster") || s.to_lowercase().contains("not found"),
2227                "expected error to mention the missing cluster id; got: {s}"
2228            );
2229        });
2230        h.shutdown(&runtime);
2231    }
2232
2233    #[test]
2234    fn inspect_cluster_rejects_empty_id() {
2235        let runtime = rt();
2236        let h = Harness::new(&runtime);
2237        runtime.block_on(async {
2238            let err = h
2239                .server
2240                .dispatch_tool(
2241                    "memory_inspect_cluster",
2242                    json!({ "cluster_id": "   " }),
2243                )
2244                .await
2245                .expect_err("blank cluster_id must error");
2246            let s = format!("{err:?}");
2247            assert!(
2248                s.to_lowercase().contains("cluster_id")
2249                    || s.to_lowercase().contains("must not be empty"),
2250                "got: {s}"
2251            );
2252        });
2253        h.shutdown(&runtime);
2254    }
2255
2256    // ---- Document tools (v0.7.0 P5) ----
2257    //
2258    // The five document handlers each have two arg-shape tests:
2259    //   - arg-struct parses from JSON (serde round-trip; defaults work).
2260    //   - dispatch arm routes to the handler (we observe behaviour via
2261    //     a known empty-DB response — bad routing surfaces as
2262    //     "unknown tool" or wrong shape).
2263    //
2264    // Functional coverage (ingest → search → inspect → forget) lives in
2265    // `crates/solo-cli/tests/mcp_smoke.rs` where a real subprocess + real
2266    // writer-with-embedder is wired up. The in-process Harness here uses
2267    // `WriterActor::spawn` which doesn't carry an embedder, so ingest /
2268    // search themselves return an error — but the dispatch + arg-parse
2269    // paths exercise correctly.
2270
2271    #[test]
2272    fn ingest_document_args_parse_with_required_path() {
2273        let v: IngestDocumentArgs =
2274            serde_json::from_value(json!({ "path": "/tmp/notes.md" })).expect("parses");
2275        assert_eq!(v.path, "/tmp/notes.md");
2276        // path is required — missing must reject at deserialization.
2277        let err = serde_json::from_value::<IngestDocumentArgs>(json!({})).unwrap_err();
2278        assert!(format!("{err}").contains("path"));
2279    }
2280
2281    #[test]
2282    fn search_docs_args_parse_with_default_limit() {
2283        let v: SearchDocsArgs =
2284            serde_json::from_value(json!({ "query": "backups" })).expect("parses");
2285        assert_eq!(v.query, "backups");
2286        assert_eq!(v.limit, 5, "default limit must be 5");
2287        let v: SearchDocsArgs =
2288            serde_json::from_value(json!({ "query": "backups", "limit": 20 })).expect("parses");
2289        assert_eq!(v.limit, 20);
2290    }
2291
2292    #[test]
2293    fn inspect_document_args_parse_with_required_doc_id() {
2294        let v: InspectDocumentArgs =
2295            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
2296        assert_eq!(v.doc_id, "abc");
2297        let err = serde_json::from_value::<InspectDocumentArgs>(json!({})).unwrap_err();
2298        assert!(format!("{err}").contains("doc_id"));
2299    }
2300
2301    #[test]
2302    fn list_documents_args_parse_with_all_defaults() {
2303        let v: ListDocumentsArgs = serde_json::from_value(json!({})).expect("parses");
2304        assert_eq!(v.limit, 20, "default limit must be 20");
2305        assert_eq!(v.offset, 0, "default offset must be 0");
2306        assert!(!v.include_forgotten, "default include_forgotten must be false");
2307        let v: ListDocumentsArgs = serde_json::from_value(
2308            json!({ "limit": 5, "offset": 10, "include_forgotten": true }),
2309        )
2310        .expect("parses");
2311        assert_eq!(v.limit, 5);
2312        assert_eq!(v.offset, 10);
2313        assert!(v.include_forgotten);
2314    }
2315
2316    #[test]
2317    fn forget_document_args_parse_with_required_doc_id() {
2318        let v: ForgetDocumentArgs =
2319            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
2320        assert_eq!(v.doc_id, "abc");
2321        let err = serde_json::from_value::<ForgetDocumentArgs>(json!({})).unwrap_err();
2322        assert!(format!("{err}").contains("doc_id"));
2323    }
2324
2325    #[test]
2326    fn ingest_document_rejects_empty_path() {
2327        // Reaches the dispatch arm → handle_ingest_document → empty
2328        // guard fires before the writer is touched. Proves routing.
2329        let runtime = rt();
2330        let h = Harness::new(&runtime);
2331        runtime.block_on(async {
2332            let err = h
2333                .server
2334                .dispatch_tool("memory_ingest_document", json!({ "path": "" }))
2335                .await
2336                .expect_err("empty path must error");
2337            let s = format!("{err:?}");
2338            assert!(
2339                s.to_lowercase().contains("path")
2340                    || s.to_lowercase().contains("must not be empty"),
2341                "got: {s}"
2342            );
2343        });
2344        h.shutdown(&runtime);
2345    }
2346
2347    #[test]
2348    fn search_docs_rejects_empty_query() {
2349        // Empty query trips solo_query::run_doc_search's validation
2350        // → InvalidInput → invalid_params.
2351        let runtime = rt();
2352        let h = Harness::new(&runtime);
2353        runtime.block_on(async {
2354            let err = h
2355                .server
2356                .dispatch_tool("memory_search_docs", json!({ "query": "   " }))
2357                .await
2358                .expect_err("empty query must error");
2359            let s = format!("{err:?}");
2360            assert!(
2361                s.to_lowercase().contains("must not be empty")
2362                    || s.to_lowercase().contains("invalid"),
2363                "got: {s}"
2364            );
2365        });
2366        h.shutdown(&runtime);
2367    }
2368
2369    #[test]
2370    fn inspect_document_unknown_id_returns_invalid_params() {
2371        // Valid UUID format but no row exists → handler returns
2372        // invalid_params with the missing id in the message.
2373        let runtime = rt();
2374        let h = Harness::new(&runtime);
2375        runtime.block_on(async {
2376            let err = h
2377                .server
2378                .dispatch_tool(
2379                    "memory_inspect_document",
2380                    json!({ "doc_id": "00000000-0000-7000-8000-000000000000" }),
2381                )
2382                .await
2383                .expect_err("unknown doc must error");
2384            let s = format!("{err:?}");
2385            assert!(
2386                s.to_lowercase().contains("not found"),
2387                "expected 'not found' message; got: {s}"
2388            );
2389        });
2390        h.shutdown(&runtime);
2391    }
2392
2393    #[test]
2394    fn inspect_document_rejects_malformed_id() {
2395        let runtime = rt();
2396        let h = Harness::new(&runtime);
2397        runtime.block_on(async {
2398            let err = h
2399                .server
2400                .dispatch_tool(
2401                    "memory_inspect_document",
2402                    json!({ "doc_id": "not-a-uuid" }),
2403                )
2404                .await
2405                .expect_err("malformed doc_id must error");
2406            let s = format!("{err:?}");
2407            assert!(s.contains("invalid doc_id"), "got: {s}");
2408        });
2409        h.shutdown(&runtime);
2410    }
2411
2412    #[test]
2413    fn list_documents_returns_empty_array_on_empty_db() {
2414        let runtime = rt();
2415        let h = Harness::new(&runtime);
2416        runtime.block_on(async {
2417            let r = h
2418                .server
2419                .dispatch_tool("memory_list_documents", json!({}))
2420                .await
2421                .expect("list succeeds");
2422            let text = first_text(&r);
2423            let v: serde_json::Value =
2424                serde_json::from_str(&text).expect("parses as json");
2425            assert!(v.is_array(), "expected array, got: {text}");
2426            assert_eq!(v.as_array().unwrap().len(), 0);
2427        });
2428        h.shutdown(&runtime);
2429    }
2430
2431    #[test]
2432    fn list_documents_passes_through_limit_offset_include_args() {
2433        let runtime = rt();
2434        let h = Harness::new(&runtime);
2435        runtime.block_on(async {
2436            let r = h
2437                .server
2438                .dispatch_tool(
2439                    "memory_list_documents",
2440                    json!({ "limit": 5, "offset": 10, "include_forgotten": true }),
2441                )
2442                .await
2443                .expect("list with args succeeds");
2444            let text = first_text(&r);
2445            let v: serde_json::Value =
2446                serde_json::from_str(&text).expect("parses as json");
2447            assert!(v.is_array());
2448        });
2449        h.shutdown(&runtime);
2450    }
2451
2452    #[test]
2453    fn forget_document_rejects_malformed_id() {
2454        let runtime = rt();
2455        let h = Harness::new(&runtime);
2456        runtime.block_on(async {
2457            let err = h
2458                .server
2459                .dispatch_tool(
2460                    "memory_forget_document",
2461                    json!({ "doc_id": "not-a-uuid" }),
2462                )
2463                .await
2464                .expect_err("malformed doc_id must error");
2465            let s = format!("{err:?}");
2466            assert!(s.contains("invalid doc_id"), "got: {s}");
2467        });
2468        h.shutdown(&runtime);
2469    }
2470}
2471
2472// ===========================================================================
2473// v0.8.1 P2: MCP audit principal extraction
2474// ===========================================================================
2475//
2476// These tests live in their own module because they manipulate the
2477// `SOLO_MCP_PRINCIPAL_TOKEN` env var, which is process-global mutable
2478// state. Serialised via a static `Mutex` so cargo test's multi-threaded
2479// runner doesn't race. Pattern mirrors the env-guard discipline in
2480// `solo_cli::commands::common::ollama_overrides_tests`.
2481
2482#[cfg(test)]
2483mod principal_extraction_tests {
2484    use super::*;
2485    use std::sync::Mutex;
2486
2487    /// Serialise tests that mutate `SOLO_MCP_PRINCIPAL_TOKEN`. Poisoned
2488    /// guards are recovered via `into_inner` so one panicking test
2489    /// doesn't sink the rest of the suite.
2490    static ENV_LOCK: Mutex<()> = Mutex::new(());
2491
2492    /// RAII guard that unsets the env var on drop, so a panicking test
2493    /// doesn't leak state into the next case.
2494    struct EnvGuard;
2495    impl Drop for EnvGuard {
2496        fn drop(&mut self) {
2497            // SAFETY: every caller holds ENV_LOCK across construct + drop.
2498            unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
2499        }
2500    }
2501
2502    fn set_principal_env(val: &str) -> EnvGuard {
2503        // SAFETY: ENV_LOCK held by caller.
2504        unsafe { std::env::set_var(ENV_MCP_PRINCIPAL_TOKEN, val) };
2505        EnvGuard
2506    }
2507
2508    fn clear_principal_env() -> EnvGuard {
2509        // SAFETY: ENV_LOCK held by caller.
2510        unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
2511        EnvGuard
2512    }
2513
2514    /// Stdio path: setting `SOLO_MCP_PRINCIPAL_TOKEN` produces a
2515    /// non-None principal at construction time.
2516    #[test]
2517    fn stdio_env_var_resolves_to_principal() {
2518        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
2519        let _g = set_principal_env("alice-token");
2520        let resolved = resolve_mcp_principal(None);
2521        assert_eq!(resolved.as_deref(), Some("alice-token"));
2522    }
2523
2524    /// Stdio path: absent env var ⇒ `None` (regression — must preserve
2525    /// v0.8.0 behaviour for users without auth).
2526    #[test]
2527    fn stdio_no_env_var_resolves_to_none() {
2528        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
2529        let _g = clear_principal_env();
2530        assert_eq!(resolve_mcp_principal(None), None);
2531    }
2532
2533    /// Stdio path: whitespace-only env var ⇒ `None` (don't pin every
2534    /// audit row to an empty/blank principal because of a launcher
2535    /// typo).
2536    #[test]
2537    fn stdio_whitespace_env_var_resolves_to_none() {
2538        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
2539        let _g = set_principal_env("   \t  ");
2540        assert_eq!(resolve_mcp_principal(None), None);
2541    }
2542
2543    /// HTTP-MCP path: `Authorization: Bearer <token>` header resolves
2544    /// to the token as principal.
2545    #[test]
2546    fn http_header_resolves_to_bearer_token_principal() {
2547        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
2548        let _g = clear_principal_env();
2549        let resolved = resolve_mcp_principal(Some("Bearer api-token-xyz"));
2550        assert_eq!(resolved.as_deref(), Some("api-token-xyz"));
2551    }
2552
2553    /// Precedence: when both env var AND header carry a token, the
2554    /// header wins (consistent with the rest of the auth stack — JWT
2555    /// claim beats `X-Solo-Tenant` header).
2556    #[test]
2557    fn http_header_beats_env_var() {
2558        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
2559        let _g = set_principal_env("env-token");
2560        let resolved = resolve_mcp_principal(Some("Bearer header-token"));
2561        assert_eq!(
2562            resolved.as_deref(),
2563            Some("header-token"),
2564            "header MUST win over env var per documented precedence"
2565        );
2566    }
2567
2568    /// HTTP-MCP path: malformed header (no `Bearer ` prefix) ⇒ falls
2569    /// through to env-var path.
2570    #[test]
2571    fn http_malformed_header_falls_through_to_env() {
2572        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
2573        let _g = set_principal_env("env-fallback");
2574        let resolved = resolve_mcp_principal(Some("Basic dXNlcjpwYXNz"));
2575        assert_eq!(resolved.as_deref(), Some("env-fallback"));
2576    }
2577
2578    /// HTTP-MCP path: empty bearer header (`Bearer ` with no token)
2579    /// falls through to env-var path. Matches the spirit of the
2580    /// whitespace-env-var rejection — don't credit a half-formed
2581    /// header.
2582    #[test]
2583    fn http_empty_bearer_header_falls_through_to_env() {
2584        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
2585        let _g = set_principal_env("env-fallback");
2586        let resolved = resolve_mcp_principal(Some("Bearer   "));
2587        assert_eq!(resolved.as_deref(), Some("env-fallback"));
2588    }
2589
2590    /// Across N consecutive calls of `resolve_mcp_principal`, the
2591    /// resolved principal is stable for the same env-var setting
2592    /// (regression guard: an accidental thread-local cache would
2593    /// break the "stable across N tool calls in one session" contract
2594    /// the brief calls out).
2595    #[test]
2596    fn stable_across_multiple_resolutions() {
2597        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
2598        let _g = set_principal_env("stable-token");
2599        for _ in 0..5 {
2600            assert_eq!(
2601                resolve_mcp_principal(None).as_deref(),
2602                Some("stable-token")
2603            );
2604        }
2605    }
2606}
2607
2608/// v0.9.0 P2 tests for the MCP-initialize-time LLM-config gate.
2609///
2610/// Pure-function tests of [`initialize_decision`]: no rmcp Peer is
2611/// constructed (the type's constructors are private), no MCP handshake
2612/// is driven. The wire-up between `initialize_decision` and the
2613/// side-effect path lives in [`SoloMcpServer::initialize`] and is
2614/// covered indirectly by the audit-row tests in
2615/// [`crate::llm::sampling::tests`] — those exercise the same
2616/// `SamplingLlmClient` + `WriteCommand::EmitLlmSamplingAudit` path
2617/// that `populate_sampling_steward` constructs.
2618#[cfg(test)]
2619mod initialize_decision_tests {
2620    use super::*;
2621    use solo_storage::LlmSettings;
2622
2623    /// `[llm]` absent → always Allow (matches v0.8.x behaviour).
2624    #[test]
2625    fn no_llm_block_allows_initialize_regardless_of_sampling_capability() {
2626        assert_eq!(initialize_decision(&None, false), InitializeDecision::Allow);
2627        assert_eq!(initialize_decision(&None, true), InitializeDecision::Allow);
2628    }
2629
2630    /// `[llm] mode = "none"` → always Allow.
2631    #[test]
2632    fn llm_none_allows_initialize_regardless_of_sampling_capability() {
2633        let s = Some(LlmSettings::None);
2634        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
2635        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
2636    }
2637
2638    /// `[llm] mode = "anthropic"` → always Allow.
2639    #[test]
2640    fn llm_anthropic_allows_initialize_regardless_of_sampling_capability() {
2641        let s = Some(LlmSettings::Anthropic {
2642            api_key_env: "ANTHROPIC_API_KEY".into(),
2643            model: "claude-sonnet-4-6".into(),
2644        });
2645        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
2646        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
2647    }
2648
2649    /// `[llm] mode = "ollama"` → always Allow.
2650    #[test]
2651    fn llm_ollama_allows_initialize_regardless_of_sampling_capability() {
2652        let s = Some(LlmSettings::Ollama {
2653            base_url: "http://localhost:11434".into(),
2654            model: "qwen3-coder:30b".into(),
2655        });
2656        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
2657        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
2658    }
2659
2660    /// `[llm] mode = "mcp_sampling"` + peer with sampling capability →
2661    /// populate the slot.
2662    #[test]
2663    fn llm_mcp_sampling_with_sampling_capability_populates_slot() {
2664        let s = Some(LlmSettings::McpSampling);
2665        assert_eq!(
2666            initialize_decision(&s, true),
2667            InitializeDecision::PopulateSamplingSteward
2668        );
2669    }
2670
2671    /// `[llm] mode = "mcp_sampling"` + peer WITHOUT sampling
2672    /// capability → reject initialize with the locked BLOCKER 2 error.
2673    #[test]
2674    fn llm_mcp_sampling_without_sampling_capability_rejects() {
2675        let s = Some(LlmSettings::McpSampling);
2676        assert_eq!(
2677            initialize_decision(&s, false),
2678            InitializeDecision::RejectMissingSamplingCapability
2679        );
2680    }
2681
2682    /// The locked BLOCKER 2 error message body is byte-stable: a future
2683    /// audit-revision can grep these strings and confirm they still
2684    /// land.
2685    #[test]
2686    fn sampling_capability_missing_error_message_contains_all_alternatives() {
2687        let msg = sampling_capability_missing_error_message();
2688        // Banner + four alternative blocks.
2689        assert!(msg.contains("LLM backend `mcp_sampling`"));
2690        assert!(msg.contains("mode = \"anthropic\""));
2691        assert!(msg.contains("api_key_env = \"ANTHROPIC_API_KEY\""));
2692        assert!(msg.contains("mode = \"openai\""));
2693        assert!(msg.contains("api_key_env = \"OPENAI_API_KEY\""));
2694        assert!(msg.contains("mode = \"ollama\""));
2695        assert!(msg.contains("base_url = \"http://localhost:11434\""));
2696        assert!(msg.contains("mode = \"none\""));
2697        // Footer pointer at the release-prep doc.
2698        assert!(msg.contains("docs/releases/v0.9.0.md"));
2699    }
2700}
2701
2702// fetch_recall_rows + RecallHit + RecallRow used to live here. Recall
2703// pipeline moved to solo_query::recall in commit (consolidate-recall);
2704// transports just call solo_query::run_recall and format the result.