Skip to main content

solo_api/
mcp.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! MCP (Model Context Protocol) server for Solo.
4//!
5//! Exposes eighteen tools to MCP clients (Claude Desktop, Cursor, etc.):
6//!
7//! Episode tools (v0.1+, with v0.9.2 additions):
8//!   - `memory_remember(content, source_type?, source_id?, salience?)` —
9//!     store an episode. Returns the new MemoryId. `salience` (v0.9.2+)
10//!     is optional in [0.0, 1.0] and defaults to 0.5.
11//!   - `memory_remember_batch(items)` (v0.9.2+) — atomically store N
12//!     episodes in one writer-actor transaction. Each item has the
13//!     same fields as `memory_remember`. Returns an ordered array of
14//!     MemoryIds; either all items persist or none do.
15//!   - `memory_recall(query, limit?)` — vector search. Returns the top-K
16//!     matches with content + tier + status.
17//!   - `memory_context(query, subject?, window_days?, limit?)` — combined
18//!     recall + themes + facts + contradictions bundle for agent context.
19//!   - `memory_update(memory_id, content)` — correct/supersede an active
20//!     episode's content and refresh its embedding/index row.
21//!   - `memory_forget(memory_id, reason?)` — soft-delete an episode.
22//!   - `memory_inspect(memory_id)` — return the full episode record.
23//!
24//! Derived-layer tools (v0.4.0+):
25//!   - `memory_themes(window_days?, limit?)` — list cluster themes.
26//!   - `memory_facts_about(subject, ...)` — query the structured-fact
27//!     knowledge graph (subject-predicate-object triples).
28//!   - `memory_entities(query, limit?)` — discover entity ids from the
29//!     structured-fact graph.
30//!   - `memory_contradictions(limit?)` — disagreements flagged during
31//!     consolidation.
32//!   - `memory_contradiction_resolve(...)` — mark a contradiction resolved,
33//!     unresolved, or reopened.
34//!
35//! Derived-layer tools (v0.5.0+):
36//!   - `memory_inspect_cluster(cluster_id, full_content?)` — drill
37//!     into one cluster's abstraction + source episodes (truncated).
38//!
39//! Document tools (v0.7.0+):
40//!   - `memory_ingest_document(path)` — read a file from disk, split it
41//!     into chunks, embed each, and store under documents/document_chunks.
42//!   - `memory_search_docs(query, limit?)` — vector search restricted to
43//!     document chunks; returns chunk content + parent-doc context.
44//!   - `memory_inspect_document(doc_id)` — show one document's metadata
45//!     plus a previewed list of its chunks.
46//!   - `memory_list_documents(limit?, offset?, include_forgotten?)` —
47//!     paginate over ingested documents, newest first.
48//!   - `memory_forget_document(doc_id)` — soft-delete a document; chunks
49//!     stop appearing in `memory_search_docs` and tombstone in HNSW.
50//!
51//! ## Transport
52//!
53//! `serve_stdio` wires the server to stdin/stdout for use as a subprocess
54//! ("`claude_desktop_config.json` or `~/.cursor/mcp.json` invokes
55//! `solo mcp-stdio`"). The function awaits a graceful shutdown when stdin
56//! closes (parent disconnects) — same lifecycle as `solo daemon`'s
57//! Ctrl+C path.
58//!
59//! ## What's deferred
60//!
61//! - SSE/HTTP transports — `rmcp` ships them, but v0.1 ships stdio only.
62//! - `prompts/` and `resources/` capabilities — not needed for the
63//!   four-tool surface; ServerHandler defaults return empty lists.
64//! - Tool argument validation beyond JSON Schema typing — we trust rmcp
65//!   to deserialize per the schema, then serde-deserialize into our
66//!   typed param structs. Bad inputs surface as clear errors.
67
68use std::sync::Arc;
69
70use rmcp::handler::server::ServerHandler;
71use rmcp::model::{
72    CallToolRequestParams as CallToolRequestParam, CallToolResult, Content, Implementation,
73    InitializeRequestParams, InitializeResult, ListToolsResult,
74    PaginatedRequestParams as PaginatedRequestParam, ProtocolVersion, ServerCapabilities,
75    ServerInfo, Tool,
76};
77use rmcp::service::{RequestContext, RoleServer};
78use rmcp::{ErrorData as McpError, ServiceExt};
79use serde::{Deserialize, Serialize};
80use solo_core::{Confidence, DocumentId, EncodingContext, Episode, MemoryId, Tier};
81use solo_storage::{TenantHandle, TenantRegistry};
82use std::str::FromStr;
83
84/// The MCP server. Cheap to clone — every field is `Arc`-cloneable.
85///
86/// v0.8.0 P2: an MCP session resolves to **one tenant**. The session's
87/// `tenant_handle` is resolved at `initialize` time (today: from the
88/// CLI invocation via `solo mcp-stdio --tenant <id>`; future versions
89/// may resolve per-bearer-token via OIDC). Subsequent `tools/call`
90/// invocations route through the cached handle without re-resolving.
91/// Operators that need multi-tenant MCP spawn one `solo mcp-stdio`
92/// subprocess per tenant.
93#[derive(Clone)]
94pub struct SoloMcpServer {
95    inner: Arc<Inner>,
96}
97
98struct Inner {
99    /// Multi-tenant registry shared across all sessions. Held so that a
100    /// future MCP capability that lists/inspects other tenants has a
101    /// path to them (out of scope for v0.8.0 P2). P3 (auth) will use
102    /// this to re-resolve the tenant from a bearer-token claim.
103    #[allow(dead_code)]
104    registry: Arc<TenantRegistry>,
105    /// The tenant this MCP session speaks for. Resolved at session
106    /// construction time.
107    tenant: Arc<TenantHandle>,
108    /// Read-path aliases for the canonical `"user"` subject. Sourced
109    /// from `solo.config.toml` `[identity] user_aliases`; threaded
110    /// through to `solo_query::facts_about` so a query for `"alex"`
111    /// also surfaces rows historically extracted as `"user"`. Empty
112    /// vec = behave as today (no expansion).
113    user_aliases: Vec<String>,
114    /// v0.8.0 P4 audit-log principal for this MCP session. MCP is
115    /// bearer-only (no OIDC story in the spec), so the principal is
116    /// effectively `"bearer"` when the daemon was started with
117    /// `--bearer-token-file` and `None` otherwise. Persisted here so
118    /// every tool dispatch threads it into the audit emit without
119    /// reconstructing it per call.
120    audit_principal: Option<String>,
121}
122
123/// v0.9.0 P2: outcome of inspecting the tenant's `[llm]` config + the
124/// peer's `sampling` capability at MCP `initialize` time.
125///
126/// Separating the decision from the actual slot write makes the
127/// gating logic unit-testable without needing a real
128/// `rmcp::Peer<RoleServer>` (whose constructors are private).
129/// `SoloMcpServer::initialize` performs the match and routes to the
130/// side-effect path; tests pin the table directly.
131#[derive(Debug, Clone, Copy, PartialEq, Eq)]
132pub enum InitializeDecision {
133    /// Tenant's LLM backend doesn't require an MCP peer; the slot was
134    /// populated eagerly at registry-open time (or stays `None` for
135    /// `LlmConfig::None`). MCP initialize succeeds without writing the
136    /// slot.
137    Allow,
138    /// Tenant's LLM backend is `mcp_sampling` AND the peer advertised
139    /// the `sampling` capability. `populate_sampling_steward` writes a
140    /// peer-bound Steward into the slot.
141    PopulateSamplingSteward,
142    /// Tenant's LLM backend is `mcp_sampling` but the peer did NOT
143    /// advertise the `sampling` capability. MCP initialize must refuse
144    /// with the locked BLOCKER 2 error message.
145    RejectMissingSamplingCapability,
146}
147
148/// v0.9.0 P2: decide the initialize outcome given the tenant's
149/// `[llm]` config and whether the peer advertised the `sampling`
150/// capability.
151///
152/// Pure function — no side effects, no rmcp peer required. Pinned by
153/// `initialize_decision_*` tests.
154pub fn initialize_decision(
155    llm_settings: &Option<solo_storage::LlmSettings>,
156    peer_sampling_supported: bool,
157) -> InitializeDecision {
158    match llm_settings {
159        Some(settings) if settings.requires_mcp_peer() => {
160            if peer_sampling_supported {
161                InitializeDecision::PopulateSamplingSteward
162            } else {
163                InitializeDecision::RejectMissingSamplingCapability
164            }
165        }
166        _ => InitializeDecision::Allow,
167    }
168}
169
170/// v0.9.0 P2: locked error message body for both the daemon-startup
171/// rejection guard and the MCP `initialize` capability gate (plan §3
172/// Decision 4 / BLOCKER 2 resolution). Returned verbatim to the
173/// operator so the commented-out TOML snippets are copy-pasteable.
174///
175/// Lives at module scope so the daemon startup path (in `solo-cli`)
176/// and the `SoloMcpServer::initialize` hook share one source of truth
177/// — a future audit-revision can grep the locked phrasing without
178/// chasing two divergent copies.
179pub fn sampling_capability_missing_error_message() -> String {
180    [
181        "LLM backend `mcp_sampling` requires a connected MCP client that",
182        "advertises the `sampling` capability at initialize. Either the",
183        "current MCP client does not support sampling, or this Solo",
184        "process is running in daemon-only mode (no peer to call back).",
185        "",
186        "Pick one of:",
187        "",
188        "  # Anthropic (hosted):",
189        "  [llm]",
190        "  mode = \"anthropic\"",
191        "  api_key_env = \"ANTHROPIC_API_KEY\"",
192        "  model = \"claude-sonnet-4-6\"",
193        "",
194        "  # OpenAI (hosted):",
195        "  [llm]",
196        "  mode = \"openai\"",
197        "  api_key_env = \"OPENAI_API_KEY\"",
198        "  model = \"gpt-5o\"",
199        "",
200        "  # Ollama (local daemon):",
201        "  [llm]",
202        "  mode = \"ollama\"",
203        "  base_url = \"http://localhost:11434\"",
204        "  model = \"qwen3-coder:30b\"",
205        "",
206        "  # None (cluster-only; abstractions skipped):",
207        "  [llm]",
208        "  mode = \"none\"",
209        "",
210        "See docs/releases/v0.9.0.md \u{00a7}LLM-backend selection for details.",
211    ]
212    .join("\n")
213}
214
215/// v0.8.1 P2: env var name MCP clients set when launching the server
216/// process to attribute audit rows on the stdio transport. Closes the
217/// v0.8.0 known-issue gap where MCP audit rows always carried
218/// `principal_subject = NULL` on the daemon path.
219///
220/// Precedence (when the future HTTP-MCP transport lands):
221///   1. `Authorization: Bearer <token>` header on the HTTP-MCP request
222///      (resolved through `AuthConfig::Bearer` validator).
223///   2. `SOLO_MCP_PRINCIPAL_TOKEN` env var on the spawned process.
224///
225/// For the v0.8.x stdio-only world only the env-var path applies; the
226/// header path is a no-op (no HTTP transport wired). The constant lives
227/// at module scope so external callers (CLI subcommand, tests) reference
228/// it by name rather than re-typing the string literal.
229pub const ENV_MCP_PRINCIPAL_TOKEN: &str = "SOLO_MCP_PRINCIPAL_TOKEN";
230
231/// v0.8.1 P2: resolve the MCP-session principal at `initialize`-time.
232///
233/// Reads `SOLO_MCP_PRINCIPAL_TOKEN` env var (stdio path); future HTTP-MCP
234/// callers will pass the bearer header value in via the explicit
235/// `header_value` arg. The header beats the env when both are present.
236///
237/// Returns `Some(subject)` on resolution success; `None` when neither
238/// source carries a non-empty value. Empty / whitespace-only values are
239/// treated as absent so an accidentally-set `SOLO_MCP_PRINCIPAL_TOKEN=""`
240/// in a launcher script doesn't pin every audit row to a blank principal.
241///
242/// The current implementation treats the env var value as the principal
243/// subject directly. A future hardening pass can validate against the
244/// daemon's `[auth] bearer.token` config to refuse mismatched tokens —
245/// today the env var is operator-trusted (same trust model as
246/// `SOLO_PASSPHRASE`).
247pub fn resolve_mcp_principal(header_value: Option<&str>) -> Option<String> {
248    // HTTP-MCP path wins when configured.
249    if let Some(h) = header_value {
250        if let Some(token) = h.strip_prefix("Bearer ") {
251            let trimmed = token.trim();
252            if !trimmed.is_empty() {
253                // Header carries the raw bearer token. Same shape as the
254                // stdio env-var path: the *value* is the principal
255                // subject in v0.8.1; v0.8.2+ may validate against a
256                // configured token set and surface the JWT `sub` claim
257                // instead.
258                return Some(trimmed.to_string());
259            }
260        }
261    }
262    // Stdio env-var fallback.
263    match std::env::var(ENV_MCP_PRINCIPAL_TOKEN) {
264        Ok(v) => {
265            let trimmed = v.trim();
266            if trimmed.is_empty() {
267                None
268            } else {
269                Some(trimmed.to_string())
270            }
271        }
272        Err(_) => None,
273    }
274}
275
276impl SoloMcpServer {
277    /// Build a server speaking for `tenant` (v0.8.0 P2 — one MCP session
278    /// ↔ one tenant). The registry is held so future capabilities can
279    /// reach across tenants if needed; today every handler routes
280    /// through `self.inner.tenant`.
281    ///
282    /// v0.8.1 P2: auto-resolves the audit principal from the
283    /// `SOLO_MCP_PRINCIPAL_TOKEN` env var (see [`resolve_mcp_principal`]).
284    /// When neither the env var nor a header is set, the principal stays
285    /// `None` — preserving v0.8.0 behavior for single-user setups.
286    pub fn new_for_tenant(
287        registry: Arc<TenantRegistry>,
288        tenant: Arc<TenantHandle>,
289        user_aliases: Vec<String>,
290    ) -> Self {
291        let principal = resolve_mcp_principal(None);
292        Self::new_for_tenant_with_principal(registry, tenant, user_aliases, principal)
293    }
294
295    /// v0.8.0 P4: like [`Self::new_for_tenant`], but records an explicit
296    /// audit principal subject for every tool dispatch. MCP is
297    /// bearer-only at v0.8.0 — the orchestration layer (today: the
298    /// daemon's `--bearer-token-file` path) decides whether a session
299    /// counts as "bearer-authenticated" and passes `Some("bearer")`;
300    /// CLI / unauth paths pass `None`.
301    ///
302    /// v0.8.1 P2: when the caller passes `audit_principal = None`, the
303    /// env-var auto-resolution still runs (in `new_for_tenant`). Callers
304    /// who want to *explicitly* suppress env-var resolution can call
305    /// this method with `None` after `std::env::remove_var(...)`, or use
306    /// the dedicated test constructor that bypasses env reads.
307    pub fn new_for_tenant_with_principal(
308        registry: Arc<TenantRegistry>,
309        tenant: Arc<TenantHandle>,
310        user_aliases: Vec<String>,
311        audit_principal: Option<String>,
312    ) -> Self {
313        Self {
314            inner: Arc::new(Inner {
315                registry,
316                tenant,
317                user_aliases,
318                audit_principal,
319            }),
320        }
321    }
322}
323
324/// Convenience: run the server over stdio and await its termination.
325/// Returns when stdin closes (parent disconnect) or the runtime exits.
326pub async fn serve_stdio(server: SoloMcpServer) -> anyhow::Result<()> {
327    use rmcp::transport::io::stdio;
328    let (stdin, stdout) = stdio();
329    let running = server.serve((stdin, stdout)).await?;
330    running.waiting().await?;
331    Ok(())
332}
333
334// ---------------------------------------------------------------------------
335// Tool argument schemas
336// ---------------------------------------------------------------------------
337
338#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct RememberArgs {
340    pub content: String,
341    #[serde(default)]
342    pub source_type: Option<String>,
343    #[serde(default)]
344    pub source_id: Option<String>,
345    /// v0.9.2 — optional salience in [0.0, 1.0]. `None` → 0.5 (preserves
346    /// pre-v0.9.2 behaviour). Out-of-range values are rejected by
347    /// [`Self::validate_salience`] before reaching the writer.
348    #[serde(default)]
349    pub salience: Option<f32>,
350}
351
352/// v0.9.2 — one item in a `memory_remember_batch` request.
353///
354/// Mirrors [`RememberArgs`] field-for-field minus the wrapper-tool
355/// invariant: callers pass an array of these inside [`RememberBatchArgs`].
356/// All items in a batch are persisted in a single `BEGIN IMMEDIATE`
357/// transaction (per dev-log 0120 §3 Decision A) so partial-failure
358/// scenarios are impossible from the client's perspective — either
359/// every item lands or none do.
360#[derive(Debug, Clone, Serialize, Deserialize)]
361pub struct RememberItem {
362    pub content: String,
363    #[serde(default)]
364    pub source_type: Option<String>,
365    #[serde(default)]
366    pub source_id: Option<String>,
367    /// Optional salience in [0.0, 1.0]; `None` → 0.5. See
368    /// [`RememberArgs::salience`].
369    #[serde(default)]
370    pub salience: Option<f32>,
371}
372
373/// v0.9.2 — args for the new `memory_remember_batch` MCP tool.
374///
375/// Wraps `Vec<RememberItem>`. The handler validates `items.is_empty()`
376/// and `items.len() > MAX_REMEMBER_BATCH_SIZE` before any embedding
377/// work; per-item content/salience is validated immediately afterwards.
378#[derive(Debug, Clone, Serialize, Deserialize)]
379pub struct RememberBatchArgs {
380    pub items: Vec<RememberItem>,
381}
382
383/// Validate that an optional salience value is well-formed (NaN-free
384/// and inside `[0.0, 1.0]`). Centralised so both `memory_remember` and
385/// `memory_remember_batch` share the same rejection shape.
386fn validate_salience(salience: Option<f32>) -> std::result::Result<(), McpError> {
387    if let Some(s) = salience {
388        if !s.is_finite() || !(0.0..=1.0).contains(&s) {
389            return Err(McpError::invalid_params(
390                format!("salience must be in [0.0, 1.0]; got {s}"),
391                None,
392            ));
393        }
394    }
395    Ok(())
396}
397
398#[derive(Debug, Clone, Serialize, Deserialize)]
399pub struct RecallArgs {
400    pub query: String,
401    #[serde(default = "default_limit")]
402    pub limit: usize,
403}
404
405#[derive(Debug, Clone, Serialize, Deserialize)]
406pub struct MemoryContextArgs {
407    pub query: String,
408    #[serde(default)]
409    pub subject: Option<String>,
410    #[serde(default)]
411    pub window_days: Option<i64>,
412    #[serde(default = "default_limit")]
413    pub limit: usize,
414}
415
416fn default_limit() -> usize {
417    5
418}
419
420#[derive(Debug, Clone, Serialize, Deserialize)]
421pub struct ForgetArgs {
422    pub memory_id: String,
423    #[serde(default = "default_forget_reason")]
424    pub reason: String,
425}
426
427fn default_forget_reason() -> String {
428    "user-initiated via MCP".into()
429}
430
431#[derive(Debug, Clone, Serialize, Deserialize)]
432pub struct InspectArgs {
433    pub memory_id: String,
434}
435
436#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct UpdateArgs {
438    pub memory_id: String,
439    pub content: String,
440}
441
442// Path 1 derived-layer tools (v0.4.0+) — query the Steward's outputs.
443// `solo_query::derived` is the single source of truth; these handlers
444// just translate JSON args to function args and serialise the result
445// vec to JSON for the MCP wire.
446
447#[derive(Debug, Clone, Serialize, Deserialize)]
448pub struct ThemesArgs {
449    /// Optional time window in days; `None` = unfiltered, return up
450    /// to `limit` most-recent themes across all time. `Some(7)` =
451    /// "themes from the last week".
452    #[serde(default)]
453    pub window_days: Option<i64>,
454    #[serde(default = "default_limit")]
455    pub limit: usize,
456}
457
458#[derive(Debug, Clone, Serialize, Deserialize)]
459pub struct FactsAboutArgs {
460    /// Subject id to query — required (predicate-only scans
461    /// intentionally not supported).
462    pub subject: String,
463    #[serde(default)]
464    pub predicate: Option<String>,
465    #[serde(default)]
466    pub since_ms: Option<i64>,
467    #[serde(default)]
468    pub until_ms: Option<i64>,
469    /// v0.5.1 Priority 8 — widen the query to also match rows where
470    /// `subject` appears as the object (e.g. surface "Sam pushes back
471    /// on PRs about Maya" under `facts_about(subject="maya")`).
472    /// Default `false` preserves v0.5.0 behaviour.
473    #[serde(default)]
474    pub include_as_object: bool,
475    #[serde(default = "default_limit")]
476    pub limit: usize,
477}
478
479#[derive(Debug, Clone, Serialize, Deserialize)]
480pub struct EntitiesArgs {
481    pub query: String,
482    #[serde(default = "default_limit")]
483    pub limit: usize,
484}
485
486#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct ContradictionsArgs {
488    #[serde(default = "default_limit")]
489    pub limit: usize,
490}
491
492fn default_contradiction_status() -> String {
493    "resolved".to_string()
494}
495
496#[derive(Debug, Clone, Serialize, Deserialize)]
497pub struct ContradictionResolveArgs {
498    pub a_id: String,
499    pub b_id: String,
500    pub kind: String,
501    #[serde(default = "default_contradiction_status")]
502    pub status: String,
503    #[serde(default)]
504    pub resolution_note: Option<String>,
505    #[serde(default)]
506    pub winning_triple_id: Option<String>,
507}
508
509/// Args for `memory_inspect_cluster` (v0.5.0 Priority 3). `cluster_id`
510/// is required; `full_content` is opt-in for the rare power-user case
511/// where 200-char-per-episode truncation is too aggressive.
512#[derive(Debug, Clone, Serialize, Deserialize)]
513pub struct InspectClusterArgs {
514    pub cluster_id: String,
515    /// If `true`, episode `content` fields are returned verbatim. If
516    /// `false` or omitted (the default), each episode's content is
517    /// truncated to `solo_query::EPISODE_TRUNCATE_CHARS` chars with a
518    /// trailing `…`.
519    #[serde(default)]
520    pub full_content: bool,
521}
522
523// Document tools (v0.7.0+). Five args structs paired with five handlers.
524// Wire shapes per `docs/dev-log/0083-v0.7.0-implementation-plan.md` §2 P5.
525
526#[derive(Debug, Clone, Serialize, Deserialize)]
527pub struct IngestDocumentArgs {
528    /// Server-side filesystem path to the file to ingest. Must be
529    /// readable by the Solo process. The writer parses the file by
530    /// extension, splits it into ~500-token chunks, embeds each, and
531    /// stores them under `documents` + `document_chunks`.
532    pub path: String,
533}
534
535#[derive(Debug, Clone, Serialize, Deserialize)]
536pub struct SearchDocsArgs {
537    pub query: String,
538    #[serde(default = "default_search_docs_limit")]
539    pub limit: usize,
540}
541
542fn default_search_docs_limit() -> usize {
543    5
544}
545
546#[derive(Debug, Clone, Serialize, Deserialize)]
547pub struct InspectDocumentArgs {
548    pub doc_id: String,
549}
550
551#[derive(Debug, Clone, Serialize, Deserialize)]
552pub struct ListDocumentsArgs {
553    #[serde(default = "default_list_documents_limit")]
554    pub limit: usize,
555    #[serde(default)]
556    pub offset: usize,
557    /// If `true`, also include documents the user has forgotten. Default
558    /// `false` matches the agent-UX expectation that recall + listing
559    /// ignore soft-deleted rows.
560    #[serde(default)]
561    pub include_forgotten: bool,
562}
563
564fn default_list_documents_limit() -> usize {
565    20
566}
567
568#[derive(Debug, Clone, Serialize, Deserialize)]
569pub struct ForgetDocumentArgs {
570    pub doc_id: String,
571}
572
573// ---------------------------------------------------------------------------
574// ServerHandler implementation
575// ---------------------------------------------------------------------------
576
577impl ServerHandler for SoloMcpServer {
578    fn get_info(&self) -> ServerInfo {
579        // rmcp 1.x: ServerInfo is non-exhaustive AND lives in another crate,
580        // so neither struct-literal nor functional-update syntax (..) is
581        // allowed from outside. Build via mut on a Default::default().
582        let capabilities = ServerCapabilities::builder().enable_tools().build();
583        let mut info = ServerInfo::default();
584        info.protocol_version = ProtocolVersion::default();
585        info.capabilities = capabilities;
586        // v0.9.1 P1 Fix 1 — `Implementation::from_build_env()` reads
587        // `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from rmcp's OWN build
588        // environment (the helper lives in rmcp, so the proc-macro
589        // expansion captures rmcp's manifest, not ours). On v0.9.0 every
590        // Solo MCP daemon self-identified as `{name: "rmcp", version: "1.7.0"}`.
591        // Pinned by `tests::server_info_identity_is_solo_not_rmcp_or_solo_api`.
592        // The literal `"solo"` (not `env!("CARGO_PKG_NAME")`) is deliberate:
593        // this crate is `solo-api`, but the operator-facing identity is
594        // the binary name `solo`.
595        info.server_info =
596            Implementation::new("solo".to_string(), env!("CARGO_PKG_VERSION").to_string());
597        info.instructions = Some(
598            "Solo gives you persistent memory across conversations \
599                 with this user — what they've told you before, the \
600                 people and projects in their life, and where their \
601                 stated beliefs have shifted, plus a library of \
602                 documents the user has ingested (notes, runbooks, \
603                 PDFs). Reach for these tools whenever the user \
604                 references something from earlier (\"like I \
605                 mentioned\", \"the project I'm working on\", \"my \
606                 friend Alex\", \"the notes I uploaded last week\") \
607                 or asks a question that hinges on personal context \
608                 or document content you don't have in the current \
609                 chat. \
610                 \n\nBest first call for agent work: memory_context \
611                 (one bounded bundle containing recall, themes, \
612                 optional facts, and contradictions). Use the \
613                 narrower tools below when you need more detail or \
614                 a specific operation. \
615                 \n\nTools to write or look up specific moments: \
616                 memory_remember (save something worth keeping), \
617                 memory_update (correct one active saved item), \
618                 memory_recall (search past conversations by topic), \
619                 memory_inspect (show one saved item by id), \
620                 memory_forget (delete one saved item). \
621                 \n\nTools for the bigger picture (populated as the \
622                 user uses Solo over time): memory_themes (recent \
623                 topics they've been thinking about), \
624                 memory_facts_about (what you know about a person, \
625                 project, or place — \"what do you know about \
626                 Alex?\"), memory_entities (discover graph entity \
627                 ids by name), memory_contradictions (places where the \
628                 user has said two things that disagree — surface \
629                 these before answering), memory_contradiction_resolve \
630                 (mark a contradiction resolved or reopened), \
631                 memory_inspect_cluster \
632                 (the raw conversations behind one summary). \
633                 \n\nTools for the user's documents: \
634                 memory_ingest_document (read a file from disk and \
635                 add it to Solo's library), memory_search_docs \
636                 (search across ingested documents by topic — use \
637                 when the user asks about something they wrote down \
638                 or saved as a file), memory_inspect_document (show \
639                 one document's metadata plus a preview of its \
640                 chunks), memory_list_documents (browse documents \
641                 by recency), memory_forget_document (drop a \
642                 document from the library)."
643                .into(),
644        );
645        info
646    }
647
648    /// v0.9.0 P2: override `initialize` so we can:
649    ///
650    ///   1. Cache the client's `InitializeRequestParams` on the peer
651    ///      (delegates to rmcp's default for this).
652    ///   2. If the tenant's `[llm] mode = "mcp_sampling"`:
653    ///      a. Refuse to initialize when the peer didn't advertise the
654    ///         `sampling` capability — surfaces the BLOCKER 2-locked
655    ///         error message so the user sees commented-out
656    ///         alternative TOML blocks.
657    ///      b. Otherwise build a `SamplingLlmClient`-backed Steward and
658    ///         write it into `tenant.steward_slot()` so the writer
659    ///         actor's next consolidate-tick reads a populated slot.
660    ///   3. For any other `[llm]` mode, return the configured tools
661    ///      surface unchanged (the slot was eagerly populated at
662    ///      registry-open time by the static StewardFactory).
663    async fn initialize(
664        &self,
665        request: InitializeRequestParams,
666        context: RequestContext<RoleServer>,
667    ) -> std::result::Result<InitializeResult, McpError> {
668        // Defer to rmcp's default for peer-info caching (matches the
669        // `if peer_info().is_none()` shape).
670        if context.peer.peer_info().is_none() {
671            context.peer.set_peer_info(request.clone());
672        }
673
674        let llm_settings = self.inner.tenant.config().llm.as_ref().cloned();
675        let peer_sampling_supported = request.capabilities.sampling.is_some();
676        match initialize_decision(&llm_settings, peer_sampling_supported) {
677            InitializeDecision::Allow => {}
678            InitializeDecision::PopulateSamplingSteward => {
679                // Build the sampling-backed Steward against the live
680                // peer + the per-tenant write handle, then write it
681                // into the slot.
682                self.populate_sampling_steward(&context).await;
683            }
684            InitializeDecision::RejectMissingSamplingCapability => {
685                return Err(McpError::invalid_request(
686                    sampling_capability_missing_error_message(),
687                    None,
688                ));
689            }
690        }
691
692        Ok(self.get_info())
693    }
694
695    async fn list_tools(
696        &self,
697        _request: Option<PaginatedRequestParam>,
698        _context: RequestContext<RoleServer>,
699    ) -> std::result::Result<ListToolsResult, McpError> {
700        Ok(ListToolsResult {
701            tools: build_tools(),
702            next_cursor: None,
703            ..Default::default()
704        })
705    }
706
707    async fn call_tool(
708        &self,
709        request: CallToolRequestParam,
710        _context: RequestContext<RoleServer>,
711    ) -> std::result::Result<CallToolResult, McpError> {
712        let CallToolRequestParam {
713            name, arguments, ..
714        } = request;
715        let args_value = serde_json::Value::Object(arguments.unwrap_or_default());
716        // v0.11.0 P3: stdio transport has no per-session broadcast
717        // channel to publish progress events through (one process =
718        // one tenant = one implicit "session" for the subprocess's
719        // lifetime). Pass `None` — handlers see it and skip the
720        // emission code paths silently.
721        self.dispatch_tool(&name, args_value, None).await
722    }
723}
724
725impl SoloMcpServer {
726    /// v0.9.0 P2: build a sampling-backed `Arc<Steward>` for the
727    /// current MCP session and write it into the tenant's
728    /// `steward_slot`. Called from [`Self::initialize`] when:
729    ///
730    ///   * `tenant.config().llm.requires_mcp_peer()` is true, AND
731    ///   * the peer advertised the `sampling` capability.
732    ///
733    /// Implementation notes:
734    ///
735    ///   * `StewardConfig::from_settings_then_env()` is parsed best-
736    ///     effort against the tenant's `SoloConfig.steward` block (the
737    ///     v0.11.1 TOML surface) AND env vars; if either is malformed,
738    ///     we fall back to `default()` and log a warning. This matches
739    ///     `daemon.rs`'s tolerance — a bad config shouldn't block an
740    ///     MCP session from initialising.
741    ///
742    ///   * The slot is OVERWRITTEN unconditionally — a fresh MCP
743    ///     session always wins. If a prior session's
744    ///     `SamplingLlmClient` had outstanding requests, they error out
745    ///     on the rmcp layer when their peer drops.
746    ///
747    ///   * The cached `audit_principal` is the one the MCP server
748    ///     constructed for this session via `resolve_mcp_principal`.
749    ///     Every `peer.create_message` call from this Steward routes
750    ///     that principal through to the per-tenant
751    ///     `AuditOperation::LlmSamplingCall` row.
752    async fn populate_sampling_steward(&self, context: &RequestContext<RoleServer>) {
753        // v0.11.1: read `[steward]` TOML overrides from the tenant's
754        // already-parsed `SoloConfig` and layer env vars on top, the
755        // same resolution order as `daemon.rs` + `common.rs`. Best-
756        // effort: a malformed value falls back to defaults rather than
757        // blocking session init.
758        let tenant_cfg = self.inner.tenant.config();
759        let steward_config = solo_steward::StewardConfig::from_settings_then_env(
760            tenant_cfg.steward.cluster_min_size,
761            tenant_cfg.steward.cluster_cosine_threshold,
762        )
763        .unwrap_or_else(|e| {
764            tracing::warn!(
765                error = %e,
766                "v0.11.1: StewardConfig::from_settings_then_env failed at MCP \
767                 initialize; falling back to defaults"
768            );
769            solo_steward::StewardConfig::default()
770        });
771        // v0.9.0 P5 (M3 wiring): read `[sampling]` from the tenant's
772        // already-parsed `SoloConfig`. `SamplingConfig::default()` lands
773        // when the block is omitted (5s window / 10 max-batch); operator
774        // overrides flow through to `build_sampling_steward` and into
775        // `SamplingCoordinator::with_settings`.
776        let sampling_config = self.inner.tenant.config().sampling.clone();
777        let peer = context.peer.clone();
778        let write_handle = self.inner.tenant.write().clone();
779        let steward = crate::llm::build_sampling_steward(
780            peer,
781            write_handle,
782            self.inner.audit_principal.clone(),
783            steward_config,
784            sampling_config.clone(),
785        );
786        let slot = self.inner.tenant.steward_slot();
787        let mut guard = slot.write().await;
788        *guard = Some(steward);
789        tracing::info!(
790            tenant = %self.inner.tenant.tenant_id(),
791            coalesce_window_ms = sampling_config.coalesce_window_ms,
792            coalesce_max_requests = sampling_config.coalesce_max_requests,
793            "v0.9.0 P5: MCP-sampling Steward attached to tenant.steward_slot \
794             (PeerSamplingClient → SamplingCoordinator → SamplingLlmClient)"
795        );
796    }
797
798    /// Direct tool-dispatch path used by both `call_tool` (the
799    /// ServerHandler trait method, behind the rmcp protocol layer) and
800    /// in-process tests that don't want to spin up a full transport pair.
801    /// Bypasses `RequestContext` (which requires a `Peer` not constructible
802    /// outside rmcp internals).
803    ///
804    /// v0.11.0 P3: `progress` is `Some` only when the HTTP transport
805    /// dispatched the request AND the client opted in via
806    /// `_meta.progressToken`. The three long-running handlers
807    /// (`memory_ingest_document`, `memory_search_docs`,
808    /// `memory_remember_batch`) consult the reporter; the other
809    /// 11 handlers ignore it (backward compat with stdio and with
810    /// HTTP clients that did not opt in).
811    pub async fn dispatch_tool(
812        &self,
813        name: &str,
814        args_value: serde_json::Value,
815        progress: Option<crate::mcp_progress::ProgressReporter>,
816    ) -> std::result::Result<CallToolResult, McpError> {
817        match name {
818            "memory_remember" => {
819                let args: RememberArgs = parse_args(&args_value)?;
820                self.handle_remember(args).await
821            }
822            "memory_remember_batch" => {
823                let args: RememberBatchArgs = parse_args(&args_value)?;
824                self.handle_remember_batch(args, progress).await
825            }
826            "memory_recall" => {
827                let args: RecallArgs = parse_args(&args_value)?;
828                self.handle_recall(args).await
829            }
830            "memory_context" => {
831                let args: MemoryContextArgs = parse_args(&args_value)?;
832                self.handle_memory_context(args).await
833            }
834            "memory_forget" => {
835                let args: ForgetArgs = parse_args(&args_value)?;
836                self.handle_forget(args).await
837            }
838            "memory_inspect" => {
839                let args: InspectArgs = parse_args(&args_value)?;
840                self.handle_inspect(args).await
841            }
842            "memory_update" => {
843                let args: UpdateArgs = parse_args(&args_value)?;
844                self.handle_update(args).await
845            }
846            "memory_themes" => {
847                let args: ThemesArgs = parse_args(&args_value)?;
848                self.handle_themes(args).await
849            }
850            "memory_facts_about" => {
851                let args: FactsAboutArgs = parse_args(&args_value)?;
852                self.handle_facts_about(args).await
853            }
854            "memory_entities" => {
855                let args: EntitiesArgs = parse_args(&args_value)?;
856                self.handle_entities(args).await
857            }
858            "memory_contradictions" => {
859                let args: ContradictionsArgs = parse_args(&args_value)?;
860                self.handle_contradictions(args).await
861            }
862            "memory_contradiction_resolve" => {
863                let args: ContradictionResolveArgs = parse_args(&args_value)?;
864                self.handle_contradiction_resolve(args).await
865            }
866            "memory_inspect_cluster" => {
867                let args: InspectClusterArgs = parse_args(&args_value)?;
868                self.handle_inspect_cluster(args).await
869            }
870            "memory_ingest_document" => {
871                let args: IngestDocumentArgs = parse_args(&args_value)?;
872                self.handle_ingest_document(args, progress).await
873            }
874            "memory_search_docs" => {
875                let args: SearchDocsArgs = parse_args(&args_value)?;
876                self.handle_search_docs(args, progress).await
877            }
878            "memory_inspect_document" => {
879                let args: InspectDocumentArgs = parse_args(&args_value)?;
880                self.handle_inspect_document(args).await
881            }
882            "memory_list_documents" => {
883                let args: ListDocumentsArgs = parse_args(&args_value)?;
884                self.handle_list_documents(args).await
885            }
886            "memory_forget_document" => {
887                let args: ForgetDocumentArgs = parse_args(&args_value)?;
888                self.handle_forget_document(args).await
889            }
890            other => Err(McpError::invalid_params(
891                format!("unknown tool `{other}`"),
892                None,
893            )),
894        }
895    }
896
897    /// List the tools this server exposes. Mirrors `ServerHandler::list_tools`
898    /// without requiring a RequestContext.
899    pub fn dispatch_list_tools(&self) -> Vec<Tool> {
900        build_tools()
901    }
902}
903
904fn parse_args<T: serde::de::DeserializeOwned>(
905    v: &serde_json::Value,
906) -> std::result::Result<T, McpError> {
907    serde_json::from_value(v.clone())
908        .map_err(|e| McpError::invalid_params(format!("invalid tool arguments: {e}"), None))
909}
910
911fn solo_to_mcp(e: solo_core::Error) -> McpError {
912    use solo_core::Error;
913    match e {
914        Error::NotFound(msg) => McpError::invalid_params(msg, None),
915        Error::InvalidInput(msg) => McpError::invalid_params(msg, None),
916        Error::Conflict(msg) => McpError::invalid_params(msg, None),
917        other => McpError::internal_error(other.to_string(), None),
918    }
919}
920
921// ---------------------------------------------------------------------------
922// Tool definitions (JSON Schema)
923// ---------------------------------------------------------------------------
924
925fn build_tools() -> Vec<Tool> {
926    vec![
927        Tool::new(
928            "memory_remember",
929            "Save something the user has told you — a fact, a \
930             preference, a name, a date, a context — so you can pick \
931             it up next conversation. Use whenever the user mentions \
932             something they'd reasonably expect you to recall later \
933             (\"I just started at Quotient\", \"my partner is Maya\"). \
934             Returns the saved item's id.",
935            json_schema_object(serde_json::json!({
936                "type": "object",
937                "properties": {
938                    "content": {
939                        "type": "string",
940                        "description": "The text to remember.",
941                    },
942                    "source_type": {
943                        "type": "string",
944                        "description": "Optional source-type tag (default: \"user_message\"). See docs/mcp/source-types.md for convention values.",
945                    },
946                    "source_id": {
947                        "type": "string",
948                        "description": "Optional upstream id for traceability.",
949                    },
950                    "salience": {
951                        "type": "number",
952                        "description": "Optional salience in [0.0, 1.0]; defaults to 0.5. Higher values bias toward recall ranking + retention. v0.9.2+.",
953                        "minimum": 0.0,
954                        "maximum": 1.0,
955                    },
956                },
957                "required": ["content"],
958            })),
959        ),
960        // v0.9.2 — atomic batched-remember for agentic clients. Wraps
961        // every item in one BEGIN IMMEDIATE tx so a single
962        // `memory_remember_batch` call either persists all N items or
963        // none. Designed for the solo-jarvis turn-flush pattern (per
964        // dev-log 0120 §1).
965        Tool::new(
966            "memory_remember_batch",
967            "Save several items atomically in one transaction — either \
968             every item lands or none does. Use this when you have a \
969             collection of related episodes from one logical step (a \
970             conversation turn, a tool-output bundle, an ingest batch) \
971             and partial success would leave the user's memory in a \
972             confusing half-state. Each item carries the same fields as \
973             memory_remember (content + optional source_type, source_id, \
974             salience). Returns an ordered array of memory_ids matching \
975             the input items. v0.9.2+.",
976            json_schema_object(serde_json::json!({
977                "type": "object",
978                "properties": {
979                    "items": {
980                        "type": "array",
981                        "description": "Items to remember atomically. Max 200 per call.",
982                        "minItems": 1,
983                        "maxItems": 200,
984                        "items": {
985                            "type": "object",
986                            "properties": {
987                                "content": {
988                                    "type": "string",
989                                    "description": "The text to remember.",
990                                },
991                                "source_type": {
992                                    "type": "string",
993                                    "description": "Optional source-type tag (default: \"user_message\"). See docs/mcp/source-types.md.",
994                                },
995                                "source_id": {
996                                    "type": "string",
997                                    "description": "Optional upstream id for traceability.",
998                                },
999                                "salience": {
1000                                    "type": "number",
1001                                    "description": "Optional salience in [0.0, 1.0]; defaults to 0.5.",
1002                                    "minimum": 0.0,
1003                                    "maximum": 1.0,
1004                                },
1005                            },
1006                            "required": ["content"],
1007                        },
1008                    },
1009                },
1010                "required": ["items"],
1011            })),
1012        ),
1013        Tool::new(
1014            "memory_recall",
1015            "Search past conversations with this user by topic or \
1016             phrase. Returns up to `limit` of the closest matches, \
1017             best match first. Use when the user references \
1018             something they said before (\"that book I told you \
1019             about\", \"the bug we were debugging last week\"). \
1020             Skips items the user has deleted.",
1021            json_schema_object(serde_json::json!({
1022                "type": "object",
1023                "properties": {
1024                    "query": {
1025                        "type": "string",
1026                        "description": "The query text.",
1027                    },
1028                    "limit": {
1029                        "type": "integer",
1030                        "description": "Maximum results (default 5).",
1031                        "minimum": 1,
1032                        "maximum": 100,
1033                    },
1034                },
1035                "required": ["query"],
1036            })),
1037        ),
1038        Tool::new(
1039            "memory_context",
1040            "Build a compact working-memory bundle for an agent turn. \
1041             Use this near the start of a substantial answer or task \
1042             when remembered context may matter. It combines raw \
1043             episodic recall, recent themes, optional structured facts \
1044             about `subject`, and known contradictions so clients can \
1045             ground answers without making four separate calls.",
1046            json_schema_object(serde_json::json!({
1047                "type": "object",
1048                "properties": {
1049                    "query": {
1050                        "type": "string",
1051                        "description": "Natural-language query for episodic recall.",
1052                    },
1053                    "subject": {
1054                        "type": "string",
1055                        "description": "Optional subject for structured facts. When present, facts also match object-position references.",
1056                    },
1057                    "window_days": {
1058                        "type": "integer",
1059                        "description": "Optional recency window in days for themes. Omit for unfiltered.",
1060                        "minimum": 1,
1061                    },
1062                    "limit": {
1063                        "type": "integer",
1064                        "description": "Per-section maximum results (default 5).",
1065                        "minimum": 1,
1066                        "maximum": 100,
1067                    },
1068                },
1069                "required": ["query"],
1070            })),
1071        ),
1072        Tool::new(
1073            "memory_forget",
1074            "Delete one saved item by id. Use when the user asks you \
1075             to forget something specific (\"forget that I said \
1076             X\"). The item stops appearing in future recalls. \
1077             Reversible only via backups.",
1078            json_schema_object(serde_json::json!({
1079                "type": "object",
1080                "properties": {
1081                    "memory_id": {
1082                        "type": "string",
1083                        "description": "MemoryId to forget (UUID v7).",
1084                    },
1085                    "reason": {
1086                        "type": "string",
1087                        "description": "Optional free-form reason (logged, not yet persisted).",
1088                    },
1089                },
1090                "required": ["memory_id"],
1091            })),
1092        ),
1093        Tool::new(
1094            "memory_inspect",
1095            "Show the full record for one saved item — when it was \
1096             saved, where it came from, and the full text. Use after \
1097             memory_recall when you want the complete content of a \
1098             specific hit (recall results may be truncated).",
1099            json_schema_object(serde_json::json!({
1100                "type": "object",
1101                "properties": {
1102                    "memory_id": {
1103                        "type": "string",
1104                        "description": "MemoryId to inspect (UUID v7).",
1105                    },
1106                },
1107                "required": ["memory_id"],
1108            })),
1109        ),
1110        Tool::new(
1111            "memory_update",
1112            "Correct one active saved memory and refresh its embedding \
1113             and search index entry. Use when the user says a remembered \
1114             episode is wrong or outdated and provides the corrected \
1115             wording. Returns the updated memory id, rowid, content, and \
1116             timestamp.",
1117            json_schema_object(serde_json::json!({
1118                "type": "object",
1119                "properties": {
1120                    "memory_id": {
1121                        "type": "string",
1122                        "description": "MemoryId to update (UUID v7).",
1123                    },
1124                    "content": {
1125                        "type": "string",
1126                        "description": "Replacement content for the active memory.",
1127                        "minLength": 1,
1128                    },
1129                },
1130                "required": ["memory_id", "content"],
1131            })),
1132        ),
1133        // Path 1 derived-layer tools (v0.4.0+) — query the Steward's
1134        // outputs. These are populated by `solo consolidate` and were
1135        // previously unreadable except via direct SQL.
1136        Tool::new(
1137            "memory_themes",
1138            "Recent topics the user has been thinking about. Use to \
1139             orient yourself at the start of a conversation, or when \
1140             the user asks \"what have I been up to\" / \"what was I \
1141             working on last week\". Pass `window_days` to scope \
1142             (e.g. 7 for last week); omit for all-time.",
1143            json_schema_object(serde_json::json!({
1144                "type": "object",
1145                "properties": {
1146                    "window_days": {
1147                        "type": "integer",
1148                        "description": "Optional time window in days. Omit for unfiltered.",
1149                        "minimum": 1,
1150                    },
1151                    "limit": {
1152                        "type": "integer",
1153                        "description": "Maximum results (default 5).",
1154                        "minimum": 1,
1155                        "maximum": 100,
1156                    },
1157                },
1158            })),
1159        ),
1160        Tool::new(
1161            "memory_facts_about",
1162            "Look up what you remember about a person, project, or \
1163             topic — names, dates, preferences, relationships. Use \
1164             when the user asks \"what do you know about Alex?\", \
1165             \"when did I start at Quotient?\", \"who is Maya?\", or \
1166             whenever you need grounded facts about someone or \
1167             something before answering. Subject is required (the \
1168             person/place/thing you're asking about); narrow further \
1169             with `predicate` (\"works_at\", \"lives_in\") or a date \
1170             range. Set `include_as_object=true` to also surface \
1171             facts where the subject appears on the receiving side of \
1172             a relationship (e.g. \"Sam pushes back on PRs about \
1173             Maya\" surfaces under facts_about(subject=\"Maya\", \
1174             include_as_object=true)). (Backed by \
1175             subject-predicate-object triples distilled from past \
1176             conversations.) Clients should set a 30s timeout on this \
1177             call; if exceeded, retry once or fall back to \
1178             `memory_recall`.",
1179            json_schema_object(serde_json::json!({
1180                "type": "object",
1181                "properties": {
1182                    "subject": {
1183                        "type": "string",
1184                        "description": "Subject id to query (e.g. 'Sam').",
1185                    },
1186                    "predicate": {
1187                        "type": "string",
1188                        "description": "Optional predicate filter (e.g. 'works_at').",
1189                    },
1190                    "since_ms": {
1191                        "type": "integer",
1192                        "description": "Optional valid_from_ms lower bound (epoch ms).",
1193                    },
1194                    "until_ms": {
1195                        "type": "integer",
1196                        "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through.",
1197                    },
1198                    "include_as_object": {
1199                        "type": "boolean",
1200                        "description": "If true, also match facts where `subject` appears as the object (e.g. 'Sam pushes back on PRs about Maya' surfaces under subject='Maya'). Default false.",
1201                        "default": false,
1202                    },
1203                    "limit": {
1204                        "type": "integer",
1205                        "description": "Maximum results (default 5).",
1206                        "minimum": 1,
1207                        "maximum": 100,
1208                    },
1209                },
1210                "required": ["subject"],
1211            })),
1212        ),
1213        Tool::new(
1214            "memory_entities",
1215            "Discover entity ids from the structured-fact graph. Use \
1216             before memory_facts_about when you are not sure how a \
1217             person, project, or topic is keyed in memory, or when the \
1218             user gives a partial name. Returns entity ids with fact \
1219             counts and common predicates.",
1220            json_schema_object(serde_json::json!({
1221                "type": "object",
1222                "properties": {
1223                    "query": {
1224                        "type": "string",
1225                        "description": "Partial or exact entity id to search for.",
1226                        "minLength": 1,
1227                    },
1228                    "limit": {
1229                        "type": "integer",
1230                        "description": "Maximum results (default 5).",
1231                        "minimum": 1,
1232                        "maximum": 100,
1233                    },
1234                },
1235                "required": ["query"],
1236            })),
1237        ),
1238        Tool::new(
1239            "memory_contradictions",
1240            "Find places where the user's stated beliefs or facts \
1241             disagree across conversations — flag disagreements \
1242             before answering. Use whenever you're about to rely on \
1243             a remembered fact that could have changed (jobs, \
1244             relationships, preferences, opinions); a disagreement \
1245             here means the user has told you both X and not-X over \
1246             time and you should ask which is current instead of \
1247             guessing. Each result shows both conflicting statements \
1248             with the topic.",
1249            json_schema_object(serde_json::json!({
1250                "type": "object",
1251                "properties": {
1252                    "limit": {
1253                        "type": "integer",
1254                        "description": "Maximum results (default 5).",
1255                        "minimum": 1,
1256                        "maximum": 100,
1257                    },
1258                },
1259            })),
1260        ),
1261        Tool::new(
1262            "memory_contradiction_resolve",
1263            "Mark one flagged contradiction as resolved, unresolved, \
1264             or reopened. Use after the user clarifies which side is \
1265             current. Pass the a_id, b_id, and kind from \
1266             memory_contradictions; status defaults to resolved.",
1267            json_schema_object(serde_json::json!({
1268                "type": "object",
1269                "properties": {
1270                    "a_id": {
1271                        "type": "string",
1272                        "description": "First contradiction id from memory_contradictions.",
1273                    },
1274                    "b_id": {
1275                        "type": "string",
1276                        "description": "Second contradiction id from memory_contradictions.",
1277                    },
1278                    "kind": {
1279                        "type": "string",
1280                        "description": "Contradiction kind from memory_contradictions.",
1281                    },
1282                    "status": {
1283                        "type": "string",
1284                        "enum": ["unresolved", "resolved", "reopened"],
1285                        "default": "resolved",
1286                        "description": "New lifecycle status.",
1287                    },
1288                    "resolution_note": {
1289                        "type": "string",
1290                        "description": "Optional human-readable clarification.",
1291                    },
1292                    "winning_triple_id": {
1293                        "type": "string",
1294                        "description": "Optional triple id to treat as the current/correct side.",
1295                    },
1296                },
1297                "required": ["a_id", "b_id", "kind"],
1298            })),
1299        ),
1300        Tool::new(
1301            "memory_inspect_cluster",
1302            "Show the raw conversations behind one summary. Returns \
1303             the one-line topic (the LLM-generated summary) and the \
1304             source conversations the topic was built from. Use \
1305             after memory_themes when the user asks \"show me the \
1306             raw context behind this\" or \"why does Solo think \
1307             that about cluster Y\". Source items are truncated to \
1308             200 chars unless `full_content` is set.",
1309            json_schema_object(serde_json::json!({
1310                "type": "object",
1311                "properties": {
1312                    "cluster_id": {
1313                        "type": "string",
1314                        "description": "Cluster id to inspect (from memory_themes hits).",
1315                    },
1316                    "full_content": {
1317                        "type": "boolean",
1318                        "description": "If true, episode content is returned verbatim. Default false (truncate to 200 chars + ellipsis).",
1319                    },
1320                },
1321                "required": ["cluster_id"],
1322            })),
1323        ),
1324        // Document tools (v0.7.0+). RAG over user-supplied files —
1325        // markdown notes, PDFs, runbooks, code, etc. Same vector space
1326        // as episodes; same embedder; same HNSW index.
1327        Tool::new(
1328            "memory_ingest_document",
1329            "Read a file from disk and add it to the user's document \
1330             library so it becomes searchable alongside past \
1331             conversations. Use when the user asks you to remember a \
1332             whole file (\"add my notes/runbook.md\", \"ingest this \
1333             PDF\"). The file is split into ~500-token chunks and \
1334             each chunk is embedded; chunks then surface through \
1335             memory_search_docs. Returns the new document id, chunk \
1336             count, and a `deduped` flag (true if the same content \
1337             was already ingested under another id).",
1338            json_schema_object(serde_json::json!({
1339                "type": "object",
1340                "properties": {
1341                    "path": {
1342                        "type": "string",
1343                        "description": "Server-side absolute path to the file to ingest. The file must be readable by the Solo process.",
1344                    },
1345                },
1346                "required": ["path"],
1347            })),
1348        ),
1349        Tool::new(
1350            "memory_search_docs",
1351            "Search across the user's ingested documents by topic or \
1352             phrase. Returns up to `limit` matching chunks, best \
1353             match first, each with the parent document's title + \
1354             source path so you can cite where the answer came from. \
1355             Use when the user asks a question that hinges on \
1356             material they've added as a file (\"what does my \
1357             runbook say about backups?\", \"find the section in the \
1358             notes about the new policy\"). Forgotten documents are \
1359             skipped.",
1360            json_schema_object(serde_json::json!({
1361                "type": "object",
1362                "properties": {
1363                    "query": {
1364                        "type": "string",
1365                        "description": "The query text.",
1366                    },
1367                    "limit": {
1368                        "type": "integer",
1369                        "description": "Maximum results (default 5).",
1370                        "minimum": 1,
1371                        "maximum": 100,
1372                    },
1373                },
1374                "required": ["query"],
1375            })),
1376        ),
1377        Tool::new(
1378            "memory_inspect_document",
1379            "Show one document's metadata plus a preview of every \
1380             chunk it was split into. Use after memory_search_docs \
1381             when the user wants the bigger picture for one hit \
1382             (\"show me the whole document this came from\"), or \
1383             after memory_list_documents to drill into one entry. \
1384             Each chunk preview is truncated to 200 chars.",
1385            json_schema_object(serde_json::json!({
1386                "type": "object",
1387                "properties": {
1388                    "doc_id": {
1389                        "type": "string",
1390                        "description": "Document id to inspect (UUID v7).",
1391                    },
1392                },
1393                "required": ["doc_id"],
1394            })),
1395        ),
1396        Tool::new(
1397            "memory_list_documents",
1398            "List the user's ingested documents, newest first. Use \
1399             when the user asks \"what documents have I added?\" or \
1400             \"show me my files\". Returns a paginated index — pass \
1401             `offset` to page further back. Forgotten documents are \
1402             hidden by default; set `include_forgotten=true` to see \
1403             them too.",
1404            json_schema_object(serde_json::json!({
1405                "type": "object",
1406                "properties": {
1407                    "limit": {
1408                        "type": "integer",
1409                        "description": "Maximum results per page (default 20).",
1410                        "minimum": 1,
1411                        "maximum": 100,
1412                    },
1413                    "offset": {
1414                        "type": "integer",
1415                        "description": "Number of rows to skip (for paging). Default 0.",
1416                        "minimum": 0,
1417                    },
1418                    "include_forgotten": {
1419                        "type": "boolean",
1420                        "description": "If true, also include documents the user has forgotten. Default false.",
1421                    },
1422                },
1423            })),
1424        ),
1425        Tool::new(
1426            "memory_forget_document",
1427            "Drop one document from the user's library by id. Use \
1428             when the user asks you to forget a specific file \
1429             (\"forget my old runbook\"). The document's chunks stop \
1430             appearing in memory_search_docs and the vectors are \
1431             tombstoned in the index. The chunk rows themselves are \
1432             kept for forensic value (a future restore command can \
1433             undo this).",
1434            json_schema_object(serde_json::json!({
1435                "type": "object",
1436                "properties": {
1437                    "doc_id": {
1438                        "type": "string",
1439                        "description": "Document id to forget (UUID v7).",
1440                    },
1441                },
1442                "required": ["doc_id"],
1443            })),
1444        ),
1445    ]
1446}
1447
1448fn json_schema_object(value: serde_json::Value) -> serde_json::Map<String, serde_json::Value> {
1449    match value {
1450        serde_json::Value::Object(map) => map,
1451        _ => panic!("json_schema_object: input must be an object"),
1452    }
1453}
1454
1455/// Names of every tool this server exposes, in registration order.
1456///
1457/// Exposed for cross-crate consumers (notably `solo doctor
1458/// --check-mcp-compat`) that want the name list without paying the
1459/// cost of building full `rmcp::Tool` records (which allocate JSON
1460/// schemas). The registration order matches `build_tools()` so any
1461/// drift between the two would be caught by the cross-provider regex
1462/// test which iterates `build_tools()`.
1463pub fn tool_names() -> Vec<&'static str> {
1464    vec![
1465        "memory_remember",
1466        // v0.9.2 — batched-remember for agentic clients (solo-jarvis).
1467        "memory_remember_batch",
1468        "memory_recall",
1469        "memory_context",
1470        "memory_forget",
1471        "memory_inspect",
1472        "memory_update",
1473        "memory_themes",
1474        "memory_facts_about",
1475        "memory_entities",
1476        "memory_contradictions",
1477        "memory_contradiction_resolve",
1478        "memory_inspect_cluster",
1479        // Document tools added in v0.7.0:
1480        "memory_ingest_document",
1481        "memory_search_docs",
1482        "memory_inspect_document",
1483        "memory_list_documents",
1484        "memory_forget_document",
1485    ]
1486}
1487
1488// ---------------------------------------------------------------------------
1489// Tool handlers
1490// ---------------------------------------------------------------------------
1491
1492impl SoloMcpServer {
1493    async fn handle_remember(
1494        &self,
1495        args: RememberArgs,
1496    ) -> std::result::Result<CallToolResult, McpError> {
1497        let content = args.content.trim_end().to_string();
1498        if content.is_empty() {
1499            return Err(McpError::invalid_params(
1500                "memory_remember: content must not be empty".to_string(),
1501                None,
1502            ));
1503        }
1504        validate_salience(args.salience)?;
1505        let embedding: solo_core::Embedding = self
1506            .inner
1507            .tenant
1508            .embedder()
1509            .embed(&content)
1510            .await
1511            .map_err(solo_to_mcp)?;
1512        let episode = Episode {
1513            memory_id: MemoryId::new(),
1514            ts_ms: chrono::Utc::now().timestamp_millis(),
1515            source_type: args.source_type.unwrap_or_else(|| "user_message".into()),
1516            source_id: args.source_id,
1517            content,
1518            encoding_context: EncodingContext::default(),
1519            provenance: None,
1520            confidence: Confidence::new(0.9).unwrap(),
1521            strength: 0.5,
1522            // v0.9.2: caller-supplied salience overrides the default. The
1523            // `validate_salience` call above has already rejected NaN /
1524            // out-of-range values.
1525            salience: args.salience.unwrap_or(0.5),
1526            tier: Tier::Hot,
1527        };
1528        let mid = self
1529            .inner
1530            .tenant
1531            .write()
1532            .remember_as(self.inner.audit_principal.clone(), episode, embedding)
1533            .await
1534            .map_err(solo_to_mcp)?;
1535        Ok(CallToolResult::success(vec![Content::text(format!(
1536            "remembered {mid}"
1537        ))]))
1538    }
1539
1540    /// v0.9.2 — handler for `memory_remember_batch`.
1541    ///
1542    /// Pipeline (mirrors `handle_remember` over N items):
1543    ///   1. Validate batch (non-empty, ≤ `MAX_REMEMBER_BATCH_SIZE`,
1544    ///      per-item content non-empty, per-item salience in [0.0, 1.0]).
1545    ///   2. Embed all items sequentially via the tenant's embedder.
1546    ///      We don't `join_all` here because the in-process embedder
1547    ///      paths today (stub, local-Anthropic, OpenAI) are individually
1548    ///      fast and serial is robust against rate-limit surprises (per
1549    ///      dev-log 0120 §8 R2 mitigation: existing embedder
1550    ///      throttling guards parallel fan-out; serial gives identical
1551    ///      semantics with simpler error paths). Parallel fan-out is a
1552    ///      v0.9.3 optimization once the batch tool has live traffic.
1553    ///   3. Build `Vec<(Episode, Embedding)>` with default Confidence /
1554    ///      strength / tier — same shape as single-Remember.
1555    ///   4. Dispatch via `WriteHandle::remember_batch_as`, which wraps
1556    ///      every INSERT in ONE `BEGIN IMMEDIATE` tx (ADR-0003 invariant
1557    ///      preserved).
1558    ///   5. Reply is `Vec<MemoryId>` in input order; serialise to JSON.
1559    async fn handle_remember_batch(
1560        &self,
1561        args: RememberBatchArgs,
1562        progress: Option<crate::mcp_progress::ProgressReporter>,
1563    ) -> std::result::Result<CallToolResult, McpError> {
1564        // 1. Batch-shape validation. The writer-actor will re-check
1565        //    `MAX_REMEMBER_BATCH_SIZE` (dev-log 0120 §3 Decision F) and
1566        //    reject with `InvalidInput` — we mirror the check here to
1567        //    avoid the round-trip into the writer + the embedder calls
1568        //    when the request is obviously over-cap.
1569        if args.items.is_empty() {
1570            return Err(McpError::invalid_params(
1571                "memory_remember_batch: items must not be empty".to_string(),
1572                None,
1573            ));
1574        }
1575        if args.items.len() > solo_storage::MAX_REMEMBER_BATCH_SIZE {
1576            return Err(McpError::invalid_params(
1577                format!(
1578                    "memory_remember_batch: {} items exceeds MAX_REMEMBER_BATCH_SIZE = {}",
1579                    args.items.len(),
1580                    solo_storage::MAX_REMEMBER_BATCH_SIZE,
1581                ),
1582                None,
1583            ));
1584        }
1585        for (i, item) in args.items.iter().enumerate() {
1586            if item.content.trim_end().is_empty() {
1587                return Err(McpError::invalid_params(
1588                    format!("memory_remember_batch: items[{i}].content must not be empty"),
1589                    None,
1590                ));
1591            }
1592            validate_salience(item.salience).map_err(|e| {
1593                // Re-wrap with the index so the caller can pinpoint
1594                // which item tripped the validator.
1595                McpError::invalid_params(
1596                    format!("memory_remember_batch: items[{i}].{}", e.message),
1597                    None,
1598                )
1599            })?;
1600        }
1601
1602        // v0.11.0 P3: progress emission is gated on batch size — below
1603        // the threshold (50 items) the wire-overhead of progress
1604        // notifications outweighs the UX benefit. Above threshold +
1605        // client opted in (`reporter.is_some()`), emit one event per
1606        // `MCP_REMEMBER_BATCH_PROGRESS_EMIT_EVERY` items during the
1607        // embed loop + one terminal "embedded" + one "inserted" event.
1608        let total = args.items.len() as u64;
1609        let progress_active = progress.is_some()
1610            && args.items.len() > crate::mcp_progress::MCP_REMEMBER_BATCH_PROGRESS_ITEM_THRESHOLD;
1611        let progress_reporter = if progress_active {
1612            progress.as_ref()
1613        } else {
1614            None
1615        };
1616
1617        // 2. Embed each item. Serial fan-out (see doc comment above).
1618        let embedder = self.inner.tenant.embedder();
1619        let now_ms = chrono::Utc::now().timestamp_millis();
1620        let mut pairs: Vec<(Episode, solo_core::Embedding)> = Vec::with_capacity(args.items.len());
1621        for (i, item) in args.items.into_iter().enumerate() {
1622            let content = item.content.trim_end().to_string();
1623            let embedding = embedder.embed(&content).await.map_err(solo_to_mcp)?;
1624            let episode = Episode {
1625                memory_id: MemoryId::new(),
1626                ts_ms: now_ms,
1627                source_type: item.source_type.unwrap_or_else(|| "user_message".into()),
1628                source_id: item.source_id,
1629                content,
1630                encoding_context: EncodingContext::default(),
1631                provenance: None,
1632                confidence: Confidence::new(0.9).unwrap(),
1633                strength: 0.5,
1634                salience: item.salience.unwrap_or(0.5),
1635                tier: Tier::Hot,
1636            };
1637            pairs.push((episode, embedding));
1638            // v0.11.0 P3 checkpoint A — embed progress, every N items.
1639            // `(i + 1) % EMIT_EVERY == 0` emits at items 25, 50, 75, ...
1640            // The terminal "embedded" event below covers any remainder.
1641            let done = (i + 1) as u64;
1642            if (i + 1) % crate::mcp_progress::MCP_REMEMBER_BATCH_PROGRESS_EMIT_EVERY == 0 {
1643                crate::mcp_progress::report_if_some(
1644                    progress_reporter,
1645                    done,
1646                    Some(total),
1647                    Some("embedding"),
1648                );
1649            }
1650        }
1651
1652        // v0.11.0 P3 checkpoint B — all items embedded; about to land
1653        // in writer-actor. Always-emitted (when progress_active) so a
1654        // batch that wasn't a multiple of EMIT_EVERY still gets a
1655        // final embed-phase event.
1656        crate::mcp_progress::report_if_some(
1657            progress_reporter,
1658            total,
1659            Some(total),
1660            Some("embedded"),
1661        );
1662
1663        // 3. Dispatch into the writer-actor. The batch lands as one tx.
1664        let memory_ids = self
1665            .inner
1666            .tenant
1667            .write()
1668            .remember_batch_as(self.inner.audit_principal.clone(), pairs)
1669            .await
1670            .map_err(solo_to_mcp)?;
1671
1672        // v0.11.0 P3 checkpoint C — writer-actor committed. The reply
1673        // body below also lands in the POST response, but this event
1674        // gives a client subscribed to the GET stream early confirmation
1675        // that the row is committed without waiting for the POST to
1676        // return (network buffering can stall the POST response
1677        // marginally; the SSE event is immediate).
1678        crate::mcp_progress::report_if_some(
1679            progress_reporter,
1680            total,
1681            Some(total),
1682            Some("inserted"),
1683        );
1684
1685        // 4. Reply: JSON-serialised array of memory ids in input order.
1686        //    Stringified so MCP clients see UUID strings (matches single
1687        //    `memory_remember`'s reply shape — both speak strings on
1688        //    the wire).
1689        let ids_as_strings: Vec<String> = memory_ids.iter().map(|m| m.to_string()).collect();
1690        let body = serde_json::to_string(&ids_as_strings)
1691            .map_err(|e| McpError::internal_error(format!("serialize batch reply: {e}"), None))?;
1692        Ok(CallToolResult::success(vec![Content::text(body)]))
1693    }
1694
1695    async fn handle_recall(
1696        &self,
1697        args: RecallArgs,
1698    ) -> std::result::Result<CallToolResult, McpError> {
1699        // Pipeline lives in solo-query; the transport just formats the
1700        // result. solo_query::run_recall validates empty queries
1701        // (returns InvalidInput → invalid_params via solo_to_mcp).
1702        let result = solo_query::run_recall(
1703            self.inner.tenant.as_ref(),
1704            self.inner.audit_principal.clone(),
1705            &args.query,
1706            args.limit,
1707        )
1708        .await
1709        .map_err(solo_to_mcp)?;
1710
1711        if result.hits.is_empty() {
1712            return Ok(CallToolResult::success(vec![Content::text(format!(
1713                "no matches (index has {} vectors)",
1714                result.index_len
1715            ))]));
1716        }
1717        let body = serde_json::to_string_pretty(&result.hits).unwrap_or_else(|_| String::new());
1718        Ok(CallToolResult::success(vec![Content::text(body)]))
1719    }
1720
1721    async fn handle_memory_context(
1722        &self,
1723        args: MemoryContextArgs,
1724    ) -> std::result::Result<CallToolResult, McpError> {
1725        let result = solo_query::memory_context(
1726            self.inner.tenant.as_ref(),
1727            self.inner.audit_principal.clone(),
1728            &args.query,
1729            args.subject.as_deref(),
1730            &self.inner.user_aliases,
1731            args.window_days,
1732            args.limit,
1733        )
1734        .await
1735        .map_err(solo_to_mcp)?;
1736        let body = serde_json::to_string_pretty(&result).unwrap_or_else(|_| String::new());
1737        Ok(CallToolResult::success(vec![Content::text(body)]))
1738    }
1739
1740    async fn handle_forget(
1741        &self,
1742        args: ForgetArgs,
1743    ) -> std::result::Result<CallToolResult, McpError> {
1744        let mid = MemoryId::from_str(&args.memory_id)
1745            .map_err(|e| McpError::invalid_params(format!("invalid memory_id: {e}"), None))?;
1746        self.inner
1747            .tenant
1748            .write()
1749            .forget_as(self.inner.audit_principal.clone(), mid, args.reason)
1750            .await
1751            .map_err(solo_to_mcp)?;
1752        Ok(CallToolResult::success(vec![Content::text(format!(
1753            "forgotten {mid}"
1754        ))]))
1755    }
1756
1757    async fn handle_inspect(
1758        &self,
1759        args: InspectArgs,
1760    ) -> std::result::Result<CallToolResult, McpError> {
1761        let mid = MemoryId::from_str(&args.memory_id)
1762            .map_err(|e| McpError::invalid_params(format!("invalid memory_id: {e}"), None))?;
1763        // Pipeline lives in solo-query::inspect; transports just format.
1764        let row = solo_query::inspect_one(
1765            self.inner.tenant.read(),
1766            self.inner.tenant.audit(),
1767            self.inner.audit_principal.clone(),
1768            mid,
1769        )
1770        .await
1771        .map_err(solo_to_mcp)?;
1772        let body = serde_json::to_string_pretty(&row).unwrap_or_else(|_| String::new());
1773        Ok(CallToolResult::success(vec![Content::text(body)]))
1774    }
1775
1776    async fn handle_update(
1777        &self,
1778        args: UpdateArgs,
1779    ) -> std::result::Result<CallToolResult, McpError> {
1780        let mid = MemoryId::from_str(&args.memory_id)
1781            .map_err(|e| McpError::invalid_params(format!("invalid memory_id: {e}"), None))?;
1782        if args.content.trim().is_empty() {
1783            return Err(McpError::invalid_params(
1784                "memory_update: content must not be empty".to_string(),
1785                None,
1786            ));
1787        }
1788        let result = solo_query::memory_update(
1789            self.inner.tenant.as_ref(),
1790            self.inner.audit_principal.clone(),
1791            mid,
1792            &args.content,
1793        )
1794        .await
1795        .map_err(solo_to_mcp)?;
1796        let body = serde_json::to_string_pretty(&result).unwrap_or_else(|_| String::new());
1797        Ok(CallToolResult::success(vec![Content::text(body)]))
1798    }
1799
1800    // Path 1 derived-layer handlers (v0.4.0+). Each one delegates to a
1801    // single solo-query::derived pipeline and serialises the result Vec
1802    // to pretty JSON for the MCP wire. Empty result → JSON empty array
1803    // `[]` (not a special-case "no matches" string) so MCP clients can
1804    // parse uniformly.
1805
1806    async fn handle_themes(
1807        &self,
1808        args: ThemesArgs,
1809    ) -> std::result::Result<CallToolResult, McpError> {
1810        let hits = solo_query::themes(
1811            self.inner.tenant.read(),
1812            self.inner.tenant.audit(),
1813            self.inner.audit_principal.clone(),
1814            args.window_days,
1815            args.limit,
1816        )
1817        .await
1818        .map_err(solo_to_mcp)?;
1819        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1820        Ok(CallToolResult::success(vec![Content::text(body)]))
1821    }
1822
1823    async fn handle_facts_about(
1824        &self,
1825        args: FactsAboutArgs,
1826    ) -> std::result::Result<CallToolResult, McpError> {
1827        if args.subject.trim().is_empty() {
1828            return Err(McpError::invalid_params(
1829                "memory_facts_about: subject must not be empty".to_string(),
1830                None,
1831            ));
1832        }
1833        let hits = solo_query::facts_about(
1834            self.inner.tenant.read(),
1835            self.inner.tenant.audit(),
1836            self.inner.audit_principal.clone(),
1837            &args.subject,
1838            &self.inner.user_aliases,
1839            args.include_as_object,
1840            args.predicate.as_deref(),
1841            args.since_ms,
1842            args.until_ms,
1843            args.limit,
1844        )
1845        .await
1846        .map_err(solo_to_mcp)?;
1847        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1848        Ok(CallToolResult::success(vec![Content::text(body)]))
1849    }
1850
1851    async fn handle_entities(
1852        &self,
1853        args: EntitiesArgs,
1854    ) -> std::result::Result<CallToolResult, McpError> {
1855        if args.query.trim().is_empty() {
1856            return Err(McpError::invalid_params(
1857                "memory_entities: query must not be empty".to_string(),
1858                None,
1859            ));
1860        }
1861        let hits = solo_query::entities(
1862            self.inner.tenant.read(),
1863            self.inner.tenant.audit(),
1864            self.inner.audit_principal.clone(),
1865            &args.query,
1866            args.limit,
1867        )
1868        .await
1869        .map_err(solo_to_mcp)?;
1870        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1871        Ok(CallToolResult::success(vec![Content::text(body)]))
1872    }
1873
1874    async fn handle_contradictions(
1875        &self,
1876        args: ContradictionsArgs,
1877    ) -> std::result::Result<CallToolResult, McpError> {
1878        let hits = solo_query::contradictions(
1879            self.inner.tenant.read(),
1880            self.inner.tenant.audit(),
1881            self.inner.audit_principal.clone(),
1882            args.limit,
1883        )
1884        .await
1885        .map_err(solo_to_mcp)?;
1886        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1887        Ok(CallToolResult::success(vec![Content::text(body)]))
1888    }
1889
1890    async fn handle_contradiction_resolve(
1891        &self,
1892        args: ContradictionResolveArgs,
1893    ) -> std::result::Result<CallToolResult, McpError> {
1894        if args.a_id.trim().is_empty() || args.b_id.trim().is_empty() || args.kind.trim().is_empty()
1895        {
1896            return Err(McpError::invalid_params(
1897                "memory_contradiction_resolve: a_id, b_id, and kind must not be empty".to_string(),
1898                None,
1899            ));
1900        }
1901        let result = solo_query::resolve_contradiction(
1902            self.inner.tenant.read(),
1903            self.inner.tenant.audit(),
1904            self.inner.audit_principal.clone(),
1905            &args.a_id,
1906            &args.b_id,
1907            &args.kind,
1908            &args.status,
1909            args.resolution_note.as_deref(),
1910            args.winning_triple_id.as_deref(),
1911        )
1912        .await
1913        .map_err(solo_to_mcp)?;
1914        let body = serde_json::to_string_pretty(&result).unwrap_or_else(|_| String::new());
1915        Ok(CallToolResult::success(vec![Content::text(body)]))
1916    }
1917
1918    async fn handle_inspect_cluster(
1919        &self,
1920        args: InspectClusterArgs,
1921    ) -> std::result::Result<CallToolResult, McpError> {
1922        if args.cluster_id.trim().is_empty() {
1923            return Err(McpError::invalid_params(
1924                "memory_inspect_cluster: cluster_id must not be empty".to_string(),
1925                None,
1926            ));
1927        }
1928        // `solo_to_mcp` maps `Error::NotFound` → `invalid_params` for
1929        // MCP (the protocol does not have a separate "not found" error
1930        // shape; clients see the message verbatim, which includes the
1931        // cluster_id).
1932        let record = solo_query::inspect_cluster(
1933            self.inner.tenant.read(),
1934            self.inner.tenant.audit(),
1935            self.inner.audit_principal.clone(),
1936            &args.cluster_id,
1937            args.full_content,
1938        )
1939        .await
1940        .map_err(solo_to_mcp)?;
1941        let body = serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
1942        Ok(CallToolResult::success(vec![Content::text(body)]))
1943    }
1944
1945    // Document handlers (v0.7.0+). Each wraps the corresponding writer
1946    // / query API; the MCP wire shape is plain JSON serialisation of
1947    // the returned report / records.
1948
1949    async fn handle_ingest_document(
1950        &self,
1951        args: IngestDocumentArgs,
1952        progress: Option<crate::mcp_progress::ProgressReporter>,
1953    ) -> std::result::Result<CallToolResult, McpError> {
1954        if args.path.trim().is_empty() {
1955            return Err(McpError::invalid_params(
1956                "memory_ingest_document: path must not be empty".to_string(),
1957                None,
1958            ));
1959        }
1960        let path = std::path::PathBuf::from(args.path);
1961        // Defaults match what the daemon uses today (target 500 tokens,
1962        // 50-token overlap). Future: thread a per-call override through
1963        // the args struct if a use case appears.
1964        let chunk_config = solo_storage::document::ChunkConfig::default();
1965
1966        // v0.11.0 P3: ingest checkpoints. The writer-actor's
1967        // `ingest_document_as` is one opaque command that internally
1968        // performs parse → chunk → embed → SQL insert; we bookend it
1969        // with phase-marker progress events. The 4-phase taxonomy
1970        // matches the MCP spec brief — `total=4`, `progress` walks 1
1971        // → 4 — even though phases 1 and 2 (parse, chunk) emit before
1972        // the writer call and 3 and 4 (embed, insert) emit after.
1973        // Real chunk-by-chunk progress would require redesigning the
1974        // writer command shape (cross-cuts ADR-0003); P3's bookend
1975        // pattern stays additive without touching the writer.
1976        const INGEST_TOTAL_PHASES: u64 = 4;
1977        crate::mcp_progress::report_if_some(
1978            progress.as_ref(),
1979            1,
1980            Some(INGEST_TOTAL_PHASES),
1981            Some("parsed"),
1982        );
1983        crate::mcp_progress::report_if_some(
1984            progress.as_ref(),
1985            2,
1986            Some(INGEST_TOTAL_PHASES),
1987            Some("chunked"),
1988        );
1989
1990        let report = self
1991            .inner
1992            .tenant
1993            .write()
1994            .ingest_document_as(self.inner.audit_principal.clone(), path, chunk_config)
1995            .await
1996            .map_err(solo_to_mcp)?;
1997
1998        crate::mcp_progress::report_if_some(
1999            progress.as_ref(),
2000            3,
2001            Some(INGEST_TOTAL_PHASES),
2002            Some("embedded"),
2003        );
2004        // Final event includes the real chunk count from the report;
2005        // the per-event `message` field carries it so clients can
2006        // surface "N chunks indexed" without parsing the POST reply
2007        // body.
2008        crate::mcp_progress::report_if_some(
2009            progress.as_ref(),
2010            INGEST_TOTAL_PHASES,
2011            Some(INGEST_TOTAL_PHASES),
2012            Some(&format!("inserted {} chunks", report.chunks_persisted)),
2013        );
2014
2015        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
2016        Ok(CallToolResult::success(vec![Content::text(body)]))
2017    }
2018
2019    async fn handle_search_docs(
2020        &self,
2021        args: SearchDocsArgs,
2022        progress: Option<crate::mcp_progress::ProgressReporter>,
2023    ) -> std::result::Result<CallToolResult, McpError> {
2024        // v0.11.0 P3: progress emission for search is gated on `top_k`
2025        // (passed via `args.limit`) — below 100 the search completes
2026        // fast enough that progress notifications add wire-overhead
2027        // with no UX benefit (Decision C). Above threshold + client
2028        // opted in, emit 3 phase-marker events around the query call.
2029        let top_k = args.limit as u32;
2030        let progress_active = progress.is_some()
2031            && top_k > crate::mcp_progress::MCP_SEARCH_DOCS_PROGRESS_TOP_K_THRESHOLD;
2032        let progress_reporter = if progress_active {
2033            progress.as_ref()
2034        } else {
2035            None
2036        };
2037        const SEARCH_TOTAL_PHASES: u64 = 3;
2038        crate::mcp_progress::report_if_some(
2039            progress_reporter,
2040            1,
2041            Some(SEARCH_TOTAL_PHASES),
2042            Some("hnsw_lookup"),
2043        );
2044
2045        // `solo_query::run_doc_search` validates empty queries (returns
2046        // InvalidInput → invalid_params via solo_to_mcp) and clamps
2047        // limit upstream of the embedder call.
2048        let hits = solo_query::run_doc_search(
2049            self.inner.tenant.as_ref(),
2050            self.inner.audit_principal.clone(),
2051            &args.query,
2052            args.limit,
2053        )
2054        .await
2055        .map_err(solo_to_mcp)?;
2056
2057        crate::mcp_progress::report_if_some(
2058            progress_reporter,
2059            2,
2060            Some(SEARCH_TOTAL_PHASES),
2061            Some("reranked"),
2062        );
2063        crate::mcp_progress::report_if_some(
2064            progress_reporter,
2065            SEARCH_TOTAL_PHASES,
2066            Some(SEARCH_TOTAL_PHASES),
2067            Some(&format!("returning {} hits", hits.len())),
2068        );
2069
2070        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
2071        Ok(CallToolResult::success(vec![Content::text(body)]))
2072    }
2073
2074    async fn handle_inspect_document(
2075        &self,
2076        args: InspectDocumentArgs,
2077    ) -> std::result::Result<CallToolResult, McpError> {
2078        let doc_id = DocumentId::from_str(&args.doc_id)
2079            .map_err(|e| McpError::invalid_params(format!("invalid doc_id: {e}"), None))?;
2080        let result_opt = solo_query::inspect_document(
2081            self.inner.tenant.read(),
2082            self.inner.tenant.audit(),
2083            self.inner.audit_principal.clone(),
2084            &doc_id,
2085        )
2086        .await
2087        .map_err(solo_to_mcp)?;
2088        match result_opt {
2089            Some(record) => {
2090                let body = serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
2091                Ok(CallToolResult::success(vec![Content::text(body)]))
2092            }
2093            None => Err(McpError::invalid_params(
2094                format!("document {doc_id} not found"),
2095                None,
2096            )),
2097        }
2098    }
2099
2100    async fn handle_list_documents(
2101        &self,
2102        args: ListDocumentsArgs,
2103    ) -> std::result::Result<CallToolResult, McpError> {
2104        let rows = solo_query::list_documents(
2105            self.inner.tenant.read(),
2106            self.inner.tenant.audit(),
2107            self.inner.audit_principal.clone(),
2108            args.limit,
2109            args.offset,
2110            args.include_forgotten,
2111        )
2112        .await
2113        .map_err(solo_to_mcp)?;
2114        let body = serde_json::to_string_pretty(&rows).unwrap_or_else(|_| String::new());
2115        Ok(CallToolResult::success(vec![Content::text(body)]))
2116    }
2117
2118    async fn handle_forget_document(
2119        &self,
2120        args: ForgetDocumentArgs,
2121    ) -> std::result::Result<CallToolResult, McpError> {
2122        let doc_id = DocumentId::from_str(&args.doc_id)
2123            .map_err(|e| McpError::invalid_params(format!("invalid doc_id: {e}"), None))?;
2124        let report = self
2125            .inner
2126            .tenant
2127            .write()
2128            .forget_document_as(self.inner.audit_principal.clone(), doc_id)
2129            .await
2130            .map_err(solo_to_mcp)?;
2131        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
2132        Ok(CallToolResult::success(vec![Content::text(body)]))
2133    }
2134}
2135
2136#[cfg(test)]
2137mod dispatch_tests {
2138    //! In-process integration tests for the MCP tool surface. We invoke
2139    //! `SoloMcpServer::dispatch_tool` directly (bypasses the rmcp
2140    //! protocol framing + `RequestContext`, which requires a `Peer`
2141    //! that's not constructible outside rmcp internals). The server is
2142    //! constructed against a real WriterActor + ReaderPool +
2143    //! StubEmbedder + StubVectorIndex from `solo_storage::test_support`.
2144    //!
2145    //! Tests live inline in this module rather than `tests/` because an
2146    //! external integration-test exe in `target/debug/deps/mcp_dispatch-*`
2147    //! tripped Windows UAC ERROR_ELEVATION_REQUIRED on the dev machine.
2148    //! The lib test binary doesn't have that issue.
2149    use super::*;
2150    use serde_json::json;
2151    use solo_core::VectorIndex;
2152    use solo_storage::test_support::StubVectorIndex;
2153    use solo_storage::{
2154        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig, StubEmbedder,
2155        TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
2156    };
2157    use std::sync::Arc as StdArc;
2158
2159    fn fake_config(dim: u32) -> SoloConfig {
2160        SoloConfig {
2161            schema_version: 1,
2162            salt_hex: "00000000000000000000000000000000".to_string(),
2163            embedder: EmbedderConfig {
2164                name: "stub".to_string(),
2165                version: "v1".to_string(),
2166                dim,
2167                dtype: "f32".to_string(),
2168            },
2169            identity: IdentityConfig::default(),
2170            documents: solo_storage::DocumentConfig::default(),
2171            auth: None,
2172            audit: solo_storage::AuditSettings::default(),
2173            redaction: solo_storage::RedactionConfig::default(),
2174            llm: None,
2175            triples: solo_storage::TriplesConfig::default(),
2176            sampling: solo_storage::SamplingConfig::default(),
2177            steward: solo_storage::StewardSettings::default(),
2178        }
2179    }
2180
2181    struct Harness {
2182        server: SoloMcpServer,
2183        _tmp: tempfile::TempDir,
2184        db_path: std::path::PathBuf,
2185        write_handle_extra: Option<solo_storage::WriteHandle>,
2186        join: Option<std::thread::JoinHandle<()>>,
2187    }
2188
2189    impl Harness {
2190        fn new(runtime: &tokio::runtime::Runtime) -> Self {
2191            use solo_storage::embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};
2192
2193            let tmp = tempfile::TempDir::new().unwrap();
2194            let dim = 16usize;
2195            let hnsw: StdArc<dyn VectorIndex + Send + Sync> =
2196                StdArc::new(StubVectorIndex::new(dim));
2197            let embedder: StdArc<dyn solo_core::Embedder> =
2198                StdArc::new(StubEmbedder::new("stub", "v1", dim));
2199
2200            let conn = solo_storage::test_support::open_test_db_at(&tmp.path().join("test.db"));
2201            let embedder_id = get_or_insert_embedder_id(
2202                &conn,
2203                &EmbedderIdentity {
2204                    name: "stub".into(),
2205                    version: "v1".into(),
2206                    dim: dim as u32,
2207                    dtype: "f32".into(),
2208                },
2209            )
2210            .expect("register stub embedder");
2211            let WriterSpawn { handle, join } =
2212                WriterActor::spawn_full(conn, hnsw.clone(), tmp.path().to_path_buf(), embedder_id);
2213
2214            // ReaderPool's deadpool::Pool needs a live tokio runtime for
2215            // both build + drop; build inside block_on.
2216            let path = tmp.path().join("test.db");
2217            let pool: ReaderPool =
2218                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });
2219
2220            let tenant_id = solo_core::TenantId::default_tenant();
2221            let tenant_handle = StdArc::new(TenantHandle::from_parts_for_tests(
2222                tenant_id.clone(),
2223                fake_config(dim as u32),
2224                path.clone(),
2225                tmp.path().to_path_buf(),
2226                embedder_id,
2227                hnsw,
2228                embedder.clone(),
2229                handle.clone(),
2230                std::thread::spawn(|| {}),
2231                pool,
2232            ));
2233            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
2234            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
2235                tmp.path().to_path_buf(),
2236                key,
2237                embedder,
2238                tenant_handle.clone(),
2239            ));
2240            let server = SoloMcpServer::new_for_tenant(registry, tenant_handle, Vec::new());
2241            Harness {
2242                server,
2243                _tmp: tmp,
2244                db_path: path,
2245                write_handle_extra: Some(handle),
2246                join: Some(join),
2247            }
2248        }
2249
2250        fn open_db(&self) -> rusqlite::Connection {
2251            solo_storage::test_support::open_test_db_at(&self.db_path)
2252        }
2253
2254        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
2255            // The whole shutdown runs inside block_on so deadpool-sqlite's
2256            // drop (which schedules cleanup on the active runtime) sees a
2257            // live reactor. Without this, dropping the SoloMcpServer
2258            // (which holds the ReaderPool through its Arc<Inner>) panics
2259            // with "no reactor running".
2260            let join = self.join.take();
2261            let extra = self.write_handle_extra.take();
2262            runtime.block_on(async move {
2263                drop(extra);
2264                drop(self.server);
2265                drop(self._tmp);
2266                if let Some(join) = join {
2267                    let (tx, rx) = std::sync::mpsc::channel();
2268                    std::thread::spawn(move || {
2269                        let _ = tx.send(join.join());
2270                    });
2271                    tokio::task::spawn_blocking(move || {
2272                        rx.recv_timeout(std::time::Duration::from_secs(5))
2273                    })
2274                    .await
2275                    .expect("blocking task")
2276                    .expect("writer thread did not exit within 5s")
2277                    .expect("writer thread panicked");
2278                }
2279            });
2280        }
2281    }
2282
2283    fn rt() -> tokio::runtime::Runtime {
2284        tokio::runtime::Builder::new_multi_thread()
2285            .worker_threads(2)
2286            .enable_all()
2287            .build()
2288            .unwrap()
2289    }
2290
2291    /// Pull the first Content::text body out of a CallToolResult. Use
2292    /// serde_json roundtrip as a robust extractor — `Content`'s public
2293    /// API doesn't directly expose the inner text without going through
2294    /// pattern-matching on RawContent.
2295    fn first_text(r: &rmcp::model::CallToolResult) -> String {
2296        let first = r.content.first().expect("at least one content item");
2297        let v = serde_json::to_value(first).expect("content serialises");
2298        v.get("text")
2299            .and_then(|t| t.as_str())
2300            .map(|s| s.to_string())
2301            .unwrap_or_else(|| format!("{v}"))
2302    }
2303
2304    fn seed_episode(conn: &rusqlite::Connection, content: &str) -> (MemoryId, i64) {
2305        let memory_id = MemoryId::new();
2306        conn.execute(
2307            "INSERT INTO episodes
2308                (memory_id, ts_ms, source_type, content, confidence, strength,
2309                 salience, tier, status, created_at_ms, updated_at_ms)
2310             VALUES (?1, 0, 'test', ?2, 0.9, 0.5, 0.5, 'hot', 'active', 0, 0)",
2311            rusqlite::params![memory_id.to_string(), content],
2312        )
2313        .expect("seed episode");
2314        (memory_id, conn.last_insert_rowid())
2315    }
2316
2317    fn seed_triple_row(
2318        conn: &rusqlite::Connection,
2319        triple_id: &str,
2320        subject: &str,
2321        predicate: &str,
2322        object: &str,
2323        source_episode_rowid: Option<i64>,
2324    ) {
2325        conn.execute(
2326            "INSERT INTO triples
2327                 (triple_id, subject_id, predicate, object_id, object_kind,
2328                  valid_from_ms, valid_to_ms, confidence, provenance_json,
2329                  status, created_at_ms, updated_at_ms, source_episode_id)
2330                 VALUES (?1, ?2, ?3, ?4, 'literal', 0, NULL, 0.9, '{}',
2331                         'active', 0, 0, ?5)",
2332            rusqlite::params![triple_id, subject, predicate, object, source_episode_rowid],
2333        )
2334        .expect("seed triple");
2335    }
2336
2337    fn seed_contradiction_row(conn: &rusqlite::Connection, a_id: &str, b_id: &str, kind: &str) {
2338        conn.execute(
2339            "INSERT INTO contradictions
2340                 (a_memory_id, b_memory_id, kind, explanation, detected_at_ms,
2341                  status, resolved_at_ms, resolution_note, winning_triple_id)
2342                 VALUES (?1, ?2, ?3, 'test contradiction', 0,
2343                         'unresolved', NULL, NULL, NULL)",
2344            rusqlite::params![a_id, b_id, kind],
2345        )
2346        .expect("seed contradiction");
2347    }
2348
2349    #[test]
2350    fn tools_list_returns_eighteen_canonical_tools() {
2351        let runtime = rt();
2352        let h = Harness::new(&runtime);
2353        let tools = h.server.dispatch_list_tools();
2354        let names: Vec<&str> = tools.iter().map(|t| t.name.as_ref()).collect();
2355        assert_eq!(
2356            names,
2357            vec![
2358                "memory_remember",
2359                // v0.9.2 — batched-remember for agentic clients.
2360                "memory_remember_batch",
2361                "memory_recall",
2362                "memory_context",
2363                "memory_forget",
2364                "memory_inspect",
2365                "memory_update",
2366                // Derived-layer tools added in v0.4.0:
2367                "memory_themes",
2368                "memory_facts_about",
2369                "memory_entities",
2370                "memory_contradictions",
2371                "memory_contradiction_resolve",
2372                // Added in v0.5.0 (Priority 3):
2373                "memory_inspect_cluster",
2374                // Document tools added in v0.7.0:
2375                "memory_ingest_document",
2376                "memory_search_docs",
2377                "memory_inspect_document",
2378                "memory_list_documents",
2379                "memory_forget_document",
2380            ]
2381        );
2382        for t in &tools {
2383            // rmcp 1.x: Tool.description is Option<Cow<'static, str>>.
2384            let desc = t.description.as_deref().unwrap_or("");
2385            assert!(!desc.is_empty(), "{} description empty", t.name);
2386            let _schema = t.schema_as_json_value();
2387            // `required` is intentionally absent on memory_themes +
2388            // memory_contradictions + memory_list_documents (all args
2389            // optional with defaults). memory_facts_about has required
2390            // = ["subject"], etc. We don't assert per-tool 'required'
2391            // shape here; the schema's `properties` field is the more
2392            // important signal and is always present.
2393        }
2394        h.shutdown(&runtime);
2395    }
2396
2397    #[test]
2398    fn themes_returns_json_array_on_empty_db() {
2399        let runtime = rt();
2400        let h = Harness::new(&runtime);
2401        runtime.block_on(async {
2402            let r = h
2403                .server
2404                .dispatch_tool("memory_themes", json!({}), None)
2405                .await
2406                .expect("themes succeeds");
2407            let text = first_text(&r);
2408            // Empty derived layer → empty array JSON. Parses cleanly.
2409            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2410            assert!(v.is_array(), "expected array, got: {text}");
2411            assert_eq!(v.as_array().unwrap().len(), 0);
2412        });
2413        h.shutdown(&runtime);
2414    }
2415
2416    #[test]
2417    fn themes_passes_through_window_and_limit_args() {
2418        let runtime = rt();
2419        let h = Harness::new(&runtime);
2420        runtime.block_on(async {
2421            // Should not crash with optional + integer args present.
2422            let r = h
2423                .server
2424                .dispatch_tool(
2425                    "memory_themes",
2426                    json!({ "window_days": 7, "limit": 20 }),
2427                    None,
2428                )
2429                .await
2430                .expect("themes with args succeeds");
2431            let text = first_text(&r);
2432            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2433            assert!(v.is_array());
2434        });
2435        h.shutdown(&runtime);
2436    }
2437
2438    #[test]
2439    fn facts_about_rejects_empty_subject() {
2440        let runtime = rt();
2441        let h = Harness::new(&runtime);
2442        runtime.block_on(async {
2443            let err = h
2444                .server
2445                .dispatch_tool("memory_facts_about", json!({ "subject": "   " }), None)
2446                .await
2447                .expect_err("empty subject must error");
2448            // McpError doesn't expose a clean kind/message accessor; just
2449            // verify the error fires (validation path reached).
2450            let s = format!("{err:?}");
2451            assert!(
2452                s.to_lowercase().contains("subject") || s.to_lowercase().contains("invalid"),
2453                "got: {s}"
2454            );
2455        });
2456        h.shutdown(&runtime);
2457    }
2458
2459    #[test]
2460    fn facts_about_returns_array_for_unknown_subject() {
2461        let runtime = rt();
2462        let h = Harness::new(&runtime);
2463        runtime.block_on(async {
2464            let r = h
2465                .server
2466                .dispatch_tool(
2467                    "memory_facts_about",
2468                    json!({ "subject": "NobodyKnowsThisSubject" }),
2469                    None,
2470                )
2471                .await
2472                .expect("facts_about with unknown subject succeeds");
2473            let text = first_text(&r);
2474            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2475            assert_eq!(v.as_array().unwrap().len(), 0);
2476        });
2477        h.shutdown(&runtime);
2478    }
2479
2480    #[test]
2481    fn facts_about_accepts_include_as_object_arg() {
2482        // Asserts the v0.5.1 P8 arg is parsed (serde default lets it
2483        // be omitted) and forwarded to the query lib without choking
2484        // the dispatcher. We don't seed triples — what we need to
2485        // verify is that the optional bool flows through. Both with
2486        // and without the arg, dispatch succeeds and returns an
2487        // empty array. (Functional coverage of the object-position
2488        // widening lives in the query-crate tests.)
2489        let runtime = rt();
2490        let h = Harness::new(&runtime);
2491        runtime.block_on(async {
2492            // With include_as_object=true.
2493            let r = h
2494                .server
2495                .dispatch_tool(
2496                    "memory_facts_about",
2497                    json!({ "subject": "Maya", "include_as_object": true }),
2498                    None,
2499                )
2500                .await
2501                .expect("dispatch with include_as_object=true succeeds");
2502            let v: serde_json::Value =
2503                serde_json::from_str(&first_text(&r)).expect("parses as json");
2504            assert_eq!(v.as_array().unwrap().len(), 0);
2505
2506            // Omitted entirely — must default to false (no error).
2507            let r = h
2508                .server
2509                .dispatch_tool("memory_facts_about", json!({ "subject": "Maya" }), None)
2510                .await
2511                .expect("dispatch without include_as_object succeeds (default false)");
2512            let v: serde_json::Value =
2513                serde_json::from_str(&first_text(&r)).expect("parses as json");
2514            assert_eq!(v.as_array().unwrap().len(), 0);
2515        });
2516        h.shutdown(&runtime);
2517    }
2518
2519    #[test]
2520    fn contradictions_returns_json_array_on_empty_db() {
2521        let runtime = rt();
2522        let h = Harness::new(&runtime);
2523        runtime.block_on(async {
2524            let r = h
2525                .server
2526                .dispatch_tool("memory_contradictions", json!({}), None)
2527                .await
2528                .expect("contradictions succeeds");
2529            let text = first_text(&r);
2530            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2531            assert!(v.is_array());
2532            assert_eq!(v.as_array().unwrap().len(), 0);
2533        });
2534        h.shutdown(&runtime);
2535    }
2536
2537    #[test]
2538    fn entities_returns_matching_graph_entities() {
2539        let runtime = rt();
2540        let h = Harness::new(&runtime);
2541        {
2542            let conn = h.open_db();
2543            let (_memory_id, rowid) = seed_episode(&conn, "Alice graph seed");
2544            seed_triple_row(
2545                &conn,
2546                "t-mcp-entity-1",
2547                "Alice",
2548                "knows",
2549                "Bob",
2550                Some(rowid),
2551            );
2552        }
2553        runtime.block_on(async {
2554            let r = h
2555                .server
2556                .dispatch_tool("memory_entities", json!({ "query": "Ali" }), None)
2557                .await
2558                .expect("entities succeeds");
2559            let v: serde_json::Value =
2560                serde_json::from_str(&first_text(&r)).expect("parses as json");
2561            assert!(
2562                v.as_array()
2563                    .unwrap()
2564                    .iter()
2565                    .any(|row| row.get("entity_id").and_then(|id| id.as_str()) == Some("Alice")),
2566                "expected Alice entity, got {v}"
2567            );
2568        });
2569        h.shutdown(&runtime);
2570    }
2571
2572    #[test]
2573    fn contradiction_resolve_updates_lifecycle() {
2574        let runtime = rt();
2575        let h = Harness::new(&runtime);
2576        {
2577            let conn = h.open_db();
2578            let (_memory_id, rowid) = seed_episode(&conn, "contradiction seed");
2579            seed_triple_row(&conn, "t-mcp-a", "Alice", "likes", "tea", Some(rowid));
2580            seed_triple_row(&conn, "t-mcp-b", "Alice", "likes", "coffee", Some(rowid));
2581            seed_contradiction_row(&conn, "t-mcp-a", "t-mcp-b", "other");
2582        }
2583        runtime.block_on(async {
2584            let r = h
2585                .server
2586                .dispatch_tool(
2587                    "memory_contradiction_resolve",
2588                    json!({
2589                        "a_id": "t-mcp-a",
2590                        "b_id": "t-mcp-b",
2591                        "kind": "other",
2592                        "resolution_note": "tea is current",
2593                        "winning_triple_id": "t-mcp-a"
2594                    }),
2595                    None,
2596                )
2597                .await
2598                .expect("resolve succeeds");
2599            let resolved: serde_json::Value =
2600                serde_json::from_str(&first_text(&r)).expect("parses as json");
2601            assert_eq!(
2602                resolved.get("status").and_then(|v| v.as_str()),
2603                Some("resolved")
2604            );
2605            assert!(
2606                resolved
2607                    .get("resolved_at_ms")
2608                    .and_then(|v| v.as_i64())
2609                    .is_some()
2610            );
2611        });
2612        h.shutdown(&runtime);
2613    }
2614
2615    #[test]
2616    fn remember_then_recall_round_trip() {
2617        let runtime = rt();
2618        let h = Harness::new(&runtime);
2619        // Use &h.server directly (no clone) so the only outstanding
2620        // reference at shutdown time is the harness's own. The clone
2621        // path triggered a 5-second writer-thread timeout because the
2622        // local clone held an Arc<Inner> with its own WriteHandle past
2623        // h.shutdown().
2624        runtime.block_on(async {
2625            let r = h
2626                .server
2627                .dispatch_tool(
2628                    "memory_remember",
2629                    json!({ "content": "the cat sat on the mat" }),
2630                    None,
2631                )
2632                .await
2633                .expect("remember succeeds");
2634            let text = first_text(&r);
2635            assert!(text.starts_with("remembered "), "got: {text}");
2636
2637            let r = h
2638                .server
2639                .dispatch_tool(
2640                    "memory_recall",
2641                    json!({ "query": "the cat sat on the mat", "limit": 5 }),
2642                    None,
2643                )
2644                .await
2645                .expect("recall succeeds");
2646            let text = first_text(&r);
2647            assert!(text.contains("the cat sat on the mat"), "got: {text}");
2648        });
2649        h.shutdown(&runtime);
2650    }
2651
2652    #[test]
2653    fn update_rewrites_memory_content() {
2654        let runtime = rt();
2655        let h = Harness::new(&runtime);
2656        runtime.block_on(async {
2657            let r = h
2658                .server
2659                .dispatch_tool(
2660                    "memory_remember",
2661                    json!({ "content": "old mcp transport memory" }),
2662                    None,
2663                )
2664                .await
2665                .expect("remember succeeds");
2666            let text = first_text(&r);
2667            let mid = text
2668                .strip_prefix("remembered ")
2669                .expect("remembered prefix")
2670                .to_string();
2671
2672            let r = h
2673                .server
2674                .dispatch_tool(
2675                    "memory_update",
2676                    json!({
2677                        "memory_id": mid,
2678                        "content": "new mcp transport memory"
2679                    }),
2680                    None,
2681                )
2682                .await
2683                .expect("update succeeds");
2684            let updated: serde_json::Value =
2685                serde_json::from_str(&first_text(&r)).expect("parses as json");
2686            assert_eq!(
2687                updated.get("content").and_then(|v| v.as_str()),
2688                Some("new mcp transport memory")
2689            );
2690        });
2691        h.shutdown(&runtime);
2692    }
2693
2694    #[test]
2695    fn memory_context_returns_json_bundle() {
2696        let runtime = rt();
2697        let h = Harness::new(&runtime);
2698        runtime.block_on(async {
2699            h.server
2700                .dispatch_tool(
2701                    "memory_remember",
2702                    json!({ "content": "memory context round trip" }),
2703                    None,
2704                )
2705                .await
2706                .expect("remember succeeds");
2707
2708            let r = h
2709                .server
2710                .dispatch_tool(
2711                    "memory_context",
2712                    json!({ "query": "memory context", "limit": 5 }),
2713                    None,
2714                )
2715                .await
2716                .expect("memory_context succeeds");
2717            let text = first_text(&r);
2718            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2719            assert_eq!(v["query"], "memory context");
2720            assert!(
2721                v["recall"]["hits"]
2722                    .as_array()
2723                    .unwrap()
2724                    .iter()
2725                    .any(|h| h["content"] == "memory context round trip"),
2726                "context recall should include remembered content: {v}"
2727            );
2728            assert!(v["themes"].is_array());
2729            assert!(v["facts"].is_array());
2730            assert!(v["contradictions"].is_array());
2731        });
2732        h.shutdown(&runtime);
2733    }
2734
2735    #[test]
2736    fn forget_excludes_row_from_subsequent_recall() {
2737        let runtime = rt();
2738        let h = Harness::new(&runtime);
2739
2740        runtime.block_on(async {
2741            let r = h
2742                .server
2743                .dispatch_tool(
2744                    "memory_remember",
2745                    json!({ "content": "to be forgotten" }),
2746                    None,
2747                )
2748                .await
2749                .unwrap();
2750            let text = first_text(&r);
2751            let mid = text.strip_prefix("remembered ").unwrap().to_string();
2752
2753            h.server
2754                .dispatch_tool(
2755                    "memory_forget",
2756                    json!({ "memory_id": mid, "reason": "test" }),
2757                    None,
2758                )
2759                .await
2760                .expect("forget succeeds");
2761
2762            let r = h
2763                .server
2764                .dispatch_tool(
2765                    "memory_recall",
2766                    json!({ "query": "to be forgotten", "limit": 5 }),
2767                    None,
2768                )
2769                .await
2770                .unwrap();
2771            let text = first_text(&r);
2772            assert!(
2773                !text.contains(r#""content": "to be forgotten""#),
2774                "forgotten row should be excluded; got: {text}"
2775            );
2776        });
2777        h.shutdown(&runtime);
2778    }
2779
2780    #[test]
2781    fn empty_remember_returns_invalid_params() {
2782        let runtime = rt();
2783        let h = Harness::new(&runtime);
2784        runtime.block_on(async {
2785            let err = h
2786                .server
2787                .dispatch_tool("memory_remember", json!({ "content": "" }), None)
2788                .await
2789                .unwrap_err();
2790            assert!(format!("{err:?}").contains("must not be empty"));
2791        });
2792        h.shutdown(&runtime);
2793    }
2794
2795    #[test]
2796    fn empty_recall_query_returns_invalid_params() {
2797        let runtime = rt();
2798        let h = Harness::new(&runtime);
2799        runtime.block_on(async {
2800            let err = h
2801                .server
2802                .dispatch_tool("memory_recall", json!({ "query": "   " }), None)
2803                .await
2804                .unwrap_err();
2805            assert!(format!("{err:?}").contains("must not be empty"));
2806        });
2807        h.shutdown(&runtime);
2808    }
2809
2810    #[test]
2811    fn inspect_with_invalid_id_returns_invalid_params() {
2812        let runtime = rt();
2813        let h = Harness::new(&runtime);
2814        runtime.block_on(async {
2815            let err = h
2816                .server
2817                .dispatch_tool("memory_inspect", json!({ "memory_id": "not-a-uuid" }), None)
2818                .await
2819                .unwrap_err();
2820            assert!(format!("{err:?}").contains("invalid memory_id"));
2821        });
2822        h.shutdown(&runtime);
2823    }
2824
2825    #[test]
2826    fn forget_unknown_id_returns_invalid_params() {
2827        let runtime = rt();
2828        let h = Harness::new(&runtime);
2829        runtime.block_on(async {
2830            // Valid UUID format but not in episodes — handle_forget
2831            // surfaces NotFound, mapped to invalid_params per
2832            // solo_to_mcp.
2833            let err = h
2834                .server
2835                .dispatch_tool(
2836                    "memory_forget",
2837                    json!({ "memory_id": "00000000-0000-7000-8000-000000000000" }),
2838                    None,
2839                )
2840                .await
2841                .unwrap_err();
2842            assert!(format!("{err:?}").contains("not found"));
2843        });
2844        h.shutdown(&runtime);
2845    }
2846
2847    #[test]
2848    fn unknown_tool_name_returns_invalid_params() {
2849        let runtime = rt();
2850        let h = Harness::new(&runtime);
2851        runtime.block_on(async {
2852            let err = h
2853                .server
2854                .dispatch_tool("memory.summon", json!({}), None)
2855                .await
2856                .unwrap_err();
2857            assert!(format!("{err:?}").contains("unknown tool"));
2858        });
2859        h.shutdown(&runtime);
2860    }
2861
2862    /// Regression guard for v0.4.1's MCP tool name fix, generalised
2863    /// in v0.5.0 Priority 4 to cover **all three** major LLM
2864    /// providers, not just Anthropic.
2865    ///
2866    /// Each provider enforces its own tool-name regex on the
2867    /// function-calling wire. A tool name has to satisfy ALL of them
2868    /// to be portable across clients:
2869    ///
2870    ///   - **Anthropic**: `^[a-zA-Z0-9_-]{1,64}$` (what shipped in
2871    ///     v0.4.1; failing this rejects the entire toolset on Claude
2872    ///     Desktop / Cursor / Claude Code with
2873    ///     `FrontendRemoteMcpToolDefinition.name: String should
2874    ///     match pattern ...`).
2875    ///   - **OpenAI** function-calling: `^[a-zA-Z_][a-zA-Z0-9_-]*$`
2876    ///     with length ≤ 64 (must start with letter or underscore).
2877    ///   - **Gemini** function-calling: documented as a-z, A-Z, 0-9,
2878    ///     underscores and dashes; some sources also allow dots. We
2879    ///     use the conservative intersection — must start with
2880    ///     letter or underscore, alphanumeric + underscore only (no
2881    ///     hyphen, no dot), length ≤ 63. This is the strictest of
2882    ///     the three patterns, so any tool that passes it also
2883    ///     passes the other two. Sources differ on whether Gemini
2884    ///     accepts dots or hyphens; the strictest reading guards us
2885    ///     against the future where one provider tightens the regex
2886    ///     (which is the failure mode v0.4.1 hit on Anthropic). See
2887    ///     <https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/docs/api/google/generativeai/protos/FunctionDeclaration.md>
2888    ///     and <https://ai.google.dev/gemini-api/docs/function-calling>.
2889    ///
2890    /// Lesson banked v0.3 #8: rmcp framing tests pass dot-named
2891    /// tools fine because rmcp's own client-side validation is
2892    /// permissive. Only the downstream provider API enforces the
2893    /// regex. This test gates the names at `cargo test` time so any
2894    /// future tool-name change has to pass all three provider
2895    /// regexes before reaching real clients.
2896    #[test]
2897    fn tool_names_match_cross_provider_regex() {
2898        /// Anthropic API name regex: `^[a-zA-Z0-9_-]{1,64}$`.
2899        fn passes_anthropic(name: &str) -> bool {
2900            let len = name.len();
2901            if !(1..=64).contains(&len) {
2902                return false;
2903            }
2904            name.chars()
2905                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
2906        }
2907
2908        /// OpenAI function-calling name regex:
2909        /// `^[a-zA-Z_][a-zA-Z0-9_-]*$`, length ≤ 64.
2910        fn passes_openai(name: &str) -> bool {
2911            let len = name.len();
2912            if !(1..=64).contains(&len) {
2913                return false;
2914            }
2915            let mut chars = name.chars();
2916            let first = match chars.next() {
2917                Some(c) => c,
2918                None => return false,
2919            };
2920            if !(first.is_ascii_alphabetic() || first == '_') {
2921                return false;
2922            }
2923            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
2924        }
2925
2926        /// Gemini function-calling name regex (conservative
2927        /// reading): `^[a-zA-Z_][a-zA-Z0-9_]*$`, length ≤ 63. No
2928        /// hyphen, no dot — strictest of the three so any name that
2929        /// passes this passes the other two.
2930        fn passes_gemini(name: &str) -> bool {
2931            let len = name.len();
2932            if !(1..=63).contains(&len) {
2933                return false;
2934            }
2935            let mut chars = name.chars();
2936            let first = match chars.next() {
2937                Some(c) => c,
2938                None => return false,
2939            };
2940            if !(first.is_ascii_alphabetic() || first == '_') {
2941                return false;
2942            }
2943            chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
2944        }
2945
2946        let tools = build_tools();
2947        assert_eq!(
2948            tools.len(),
2949            18,
2950            "expected 18 tools (context + update/entities/resolve + v0.5.x + document tools + remember_batch)"
2951        );
2952        // Sanity-check that tool_names() agrees with build_tools().
2953        let tool_name_strings: Vec<String> = tools.iter().map(|t| t.name.to_string()).collect();
2954        let public_names: Vec<String> = super::tool_names().iter().map(|s| s.to_string()).collect();
2955        assert_eq!(
2956            tool_name_strings, public_names,
2957            "tool_names() drifted from build_tools() — keep them in sync"
2958        );
2959
2960        for t in tools {
2961            assert!(
2962                passes_anthropic(&t.name),
2963                "tool name {:?} fails Anthropic regex \
2964                 ^[a-zA-Z0-9_-]{{1,64}}$ — see v0.3 lesson #8",
2965                t.name
2966            );
2967            assert!(
2968                passes_openai(&t.name),
2969                "tool name {:?} fails OpenAI function-calling regex \
2970                 ^[a-zA-Z_][a-zA-Z0-9_-]*$ (len ≤ 64)",
2971                t.name
2972            );
2973            assert!(
2974                passes_gemini(&t.name),
2975                "tool name {:?} fails Gemini function-calling regex \
2976                 ^[a-zA-Z_][a-zA-Z0-9_]*$ (len ≤ 63, strict)",
2977                t.name
2978            );
2979        }
2980    }
2981
2982    /// Regression guard for the v0.5.0 Priority 4 jargon pass.
2983    ///
2984    /// Tool descriptions and `get_info().instructions` are the first
2985    /// (and often only) thing a calling LLM reads when its
2986    /// tool-search mechanism decides whether Solo's tools are
2987    /// relevant. Earlier descriptions leaned on Solo-internal
2988    /// vocabulary (`SPO`, `Steward`, `LEFT JOIN`, `candidate pair`,
2989    /// `tagged_with`) which doesn't pattern-match natural-language
2990    /// agent queries like "what do you know about Alex?" — that's
2991    /// the load-bearing v0.5.0 finding from the 2026-05-14
2992    /// thesis-test in Claude Desktop.
2993    ///
2994    /// This test pins the de-jargoning by forbidding the old
2995    /// vocabulary from appearing in any user-facing text. Future
2996    /// contributors who reach for jargon trip the test and have to
2997    /// pick plain-English phrasing instead.
2998    #[test]
2999    fn tool_descriptions_avoid_internal_jargon() {
3000        // Case-insensitive substring match. Drawn from the
3001        // pre-Priority-4 descriptions; expand only if a new term
3002        // creeps in.
3003        const FORBIDDEN: &[&str] = &[
3004            "SPO",
3005            "Steward",
3006            "Steward-flagged",
3007            "LEFT JOIN",
3008            "candidate pair",
3009            "candidate_pair",
3010            "tagged_with",
3011        ];
3012
3013        fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
3014            haystack.to_lowercase().contains(&needle.to_lowercase())
3015        }
3016
3017        // 1. Each tool description.
3018        for t in build_tools() {
3019            let desc = t.description.as_deref().unwrap_or("");
3020            for term in FORBIDDEN {
3021                assert!(
3022                    !contains_case_insensitive(desc, term),
3023                    "tool {:?} description contains forbidden jargon \
3024                     {:?} — rewrite in plain English (see v0.5.0 \
3025                     Priority 4)",
3026                    t.name,
3027                    term,
3028                );
3029            }
3030        }
3031
3032        // 2. The server-level instructions (what tool-search sees
3033        // first).
3034        let server_info = harness_server_info();
3035        let instructions = server_info
3036            .instructions
3037            .as_deref()
3038            .expect("get_info() must set instructions");
3039        for term in FORBIDDEN {
3040            assert!(
3041                !contains_case_insensitive(instructions, term),
3042                "get_info().instructions contains forbidden jargon \
3043                 {:?} — rewrite in plain English",
3044                term,
3045            );
3046        }
3047    }
3048
3049    /// Build a `ServerInfo` for the jargon test without spinning up
3050    /// the full harness (which needs tokio + tempdir). The
3051    /// `ServerHandler::get_info()` method doesn't take `&self` state
3052    /// in any meaningful way for our impl — it returns a static
3053    /// `ServerInfo` literal — so we construct a minimal-input server
3054    /// just to call it.
3055    fn harness_server_info() -> rmcp::model::ServerInfo {
3056        let runtime = rt();
3057        let h = Harness::new(&runtime);
3058        let info = ServerHandler::get_info(&h.server);
3059        h.shutdown(&runtime);
3060        info
3061    }
3062
3063    /// Regression guard for the v0.9.0 → v0.9.1 P1 Fix 1 MCP
3064    /// `serverInfo` identity regression.
3065    ///
3066    /// In v0.9.0, P0a's rmcp 0.1.5 → 1.7 bump replaced the explicit
3067    /// `Implementation::new("solo", "<version>")` constructor with
3068    /// `Implementation::from_build_env()`. That helper reads
3069    /// `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from **rmcp's own** build
3070    /// environment (the proc-macro expansion captures rmcp's
3071    /// `Cargo.toml`, not the consumer's). Every Solo MCP daemon on
3072    /// v0.9.0 self-identified as `{name: "rmcp", version: "1.7.0"}`
3073    /// instead of `{name: "solo", version: "<workspace.version>"}`.
3074    ///
3075    /// Pins:
3076    ///   - `name == "solo"` (the operator-facing binary name, not
3077    ///     `"solo-api"` which would come from
3078    ///     `env!("CARGO_PKG_NAME")` against this crate's manifest);
3079    ///   - `version == env!("CARGO_PKG_VERSION")` from solo-api's own
3080    ///     compile environment (this is the workspace.package version
3081    ///     via inheritance, so it stays in sync with `solo --version`
3082    ///     and `solo-cli`'s identity).
3083    #[test]
3084    fn server_info_identity_is_solo_not_rmcp_or_solo_api() {
3085        let info = harness_server_info();
3086        let name = info.server_info.name.as_str();
3087        let version = info.server_info.version.as_str();
3088        assert_eq!(
3089            name, "solo",
3090            "MCP serverInfo.name must be \"solo\" (not \"rmcp\" or \
3091             \"solo-api\"). got name={name:?} version={version:?}"
3092        );
3093        assert_eq!(
3094            version,
3095            env!("CARGO_PKG_VERSION"),
3096            "MCP serverInfo.version must match solo-api's compile-time \
3097             CARGO_PKG_VERSION (i.e. the workspace.package version); \
3098             a mismatch means we regressed back to rmcp's build env. \
3099             got version={version:?}"
3100        );
3101    }
3102
3103    // ---- memory_inspect_cluster (v0.5.0 Priority 3) ----
3104
3105    #[test]
3106    fn inspect_cluster_unknown_id_returns_invalid_params() {
3107        // NotFound from solo_query::inspect_cluster is mapped through
3108        // `solo_to_mcp` to `invalid_params` (MCP has no separate
3109        // not-found error shape). Error message should name the id.
3110        let runtime = rt();
3111        let h = Harness::new(&runtime);
3112        runtime.block_on(async {
3113            let err = h
3114                .server
3115                .dispatch_tool(
3116                    "memory_inspect_cluster",
3117                    json!({ "cluster_id": "no-such-cluster" }),
3118                    None,
3119                )
3120                .await
3121                .expect_err("unknown cluster must error");
3122            let s = format!("{err:?}");
3123            assert!(
3124                s.contains("no-such-cluster") || s.to_lowercase().contains("not found"),
3125                "expected error to mention the missing cluster id; got: {s}"
3126            );
3127        });
3128        h.shutdown(&runtime);
3129    }
3130
3131    #[test]
3132    fn inspect_cluster_rejects_empty_id() {
3133        let runtime = rt();
3134        let h = Harness::new(&runtime);
3135        runtime.block_on(async {
3136            let err = h
3137                .server
3138                .dispatch_tool(
3139                    "memory_inspect_cluster",
3140                    json!({ "cluster_id": "   " }),
3141                    None,
3142                )
3143                .await
3144                .expect_err("blank cluster_id must error");
3145            let s = format!("{err:?}");
3146            assert!(
3147                s.to_lowercase().contains("cluster_id")
3148                    || s.to_lowercase().contains("must not be empty"),
3149                "got: {s}"
3150            );
3151        });
3152        h.shutdown(&runtime);
3153    }
3154
3155    // ---- Document tools (v0.7.0 P5) ----
3156    //
3157    // The five document handlers each have two arg-shape tests:
3158    //   - arg-struct parses from JSON (serde round-trip; defaults work).
3159    //   - dispatch arm routes to the handler (we observe behaviour via
3160    //     a known empty-DB response — bad routing surfaces as
3161    //     "unknown tool" or wrong shape).
3162    //
3163    // Functional coverage (ingest → search → inspect → forget) lives in
3164    // `crates/solo-cli/tests/mcp_smoke.rs` where a real subprocess + real
3165    // writer-with-embedder is wired up. The in-process Harness here uses
3166    // `WriterActor::spawn` which doesn't carry an embedder, so ingest /
3167    // search themselves return an error — but the dispatch + arg-parse
3168    // paths exercise correctly.
3169
3170    #[test]
3171    fn ingest_document_args_parse_with_required_path() {
3172        let v: IngestDocumentArgs =
3173            serde_json::from_value(json!({ "path": "/tmp/notes.md" })).expect("parses");
3174        assert_eq!(v.path, "/tmp/notes.md");
3175        // path is required — missing must reject at deserialization.
3176        let err = serde_json::from_value::<IngestDocumentArgs>(json!({})).unwrap_err();
3177        assert!(format!("{err}").contains("path"));
3178    }
3179
3180    #[test]
3181    fn search_docs_args_parse_with_default_limit() {
3182        let v: SearchDocsArgs =
3183            serde_json::from_value(json!({ "query": "backups" })).expect("parses");
3184        assert_eq!(v.query, "backups");
3185        assert_eq!(v.limit, 5, "default limit must be 5");
3186        let v: SearchDocsArgs =
3187            serde_json::from_value(json!({ "query": "backups", "limit": 20 })).expect("parses");
3188        assert_eq!(v.limit, 20);
3189    }
3190
3191    #[test]
3192    fn inspect_document_args_parse_with_required_doc_id() {
3193        let v: InspectDocumentArgs =
3194            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
3195        assert_eq!(v.doc_id, "abc");
3196        let err = serde_json::from_value::<InspectDocumentArgs>(json!({})).unwrap_err();
3197        assert!(format!("{err}").contains("doc_id"));
3198    }
3199
3200    #[test]
3201    fn list_documents_args_parse_with_all_defaults() {
3202        let v: ListDocumentsArgs = serde_json::from_value(json!({})).expect("parses");
3203        assert_eq!(v.limit, 20, "default limit must be 20");
3204        assert_eq!(v.offset, 0, "default offset must be 0");
3205        assert!(
3206            !v.include_forgotten,
3207            "default include_forgotten must be false"
3208        );
3209        let v: ListDocumentsArgs =
3210            serde_json::from_value(json!({ "limit": 5, "offset": 10, "include_forgotten": true }))
3211                .expect("parses");
3212        assert_eq!(v.limit, 5);
3213        assert_eq!(v.offset, 10);
3214        assert!(v.include_forgotten);
3215    }
3216
3217    #[test]
3218    fn forget_document_args_parse_with_required_doc_id() {
3219        let v: ForgetDocumentArgs =
3220            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
3221        assert_eq!(v.doc_id, "abc");
3222        let err = serde_json::from_value::<ForgetDocumentArgs>(json!({})).unwrap_err();
3223        assert!(format!("{err}").contains("doc_id"));
3224    }
3225
3226    #[test]
3227    fn ingest_document_rejects_empty_path() {
3228        // Reaches the dispatch arm → handle_ingest_document → empty
3229        // guard fires before the writer is touched. Proves routing.
3230        let runtime = rt();
3231        let h = Harness::new(&runtime);
3232        runtime.block_on(async {
3233            let err = h
3234                .server
3235                .dispatch_tool("memory_ingest_document", json!({ "path": "" }), None)
3236                .await
3237                .expect_err("empty path must error");
3238            let s = format!("{err:?}");
3239            assert!(
3240                s.to_lowercase().contains("path") || s.to_lowercase().contains("must not be empty"),
3241                "got: {s}"
3242            );
3243        });
3244        h.shutdown(&runtime);
3245    }
3246
3247    #[test]
3248    fn search_docs_rejects_empty_query() {
3249        // Empty query trips solo_query::run_doc_search's validation
3250        // → InvalidInput → invalid_params.
3251        let runtime = rt();
3252        let h = Harness::new(&runtime);
3253        runtime.block_on(async {
3254            let err = h
3255                .server
3256                .dispatch_tool("memory_search_docs", json!({ "query": "   " }), None)
3257                .await
3258                .expect_err("empty query must error");
3259            let s = format!("{err:?}");
3260            assert!(
3261                s.to_lowercase().contains("must not be empty")
3262                    || s.to_lowercase().contains("invalid"),
3263                "got: {s}"
3264            );
3265        });
3266        h.shutdown(&runtime);
3267    }
3268
3269    #[test]
3270    fn inspect_document_unknown_id_returns_invalid_params() {
3271        // Valid UUID format but no row exists → handler returns
3272        // invalid_params with the missing id in the message.
3273        let runtime = rt();
3274        let h = Harness::new(&runtime);
3275        runtime.block_on(async {
3276            let err = h
3277                .server
3278                .dispatch_tool(
3279                    "memory_inspect_document",
3280                    json!({ "doc_id": "00000000-0000-7000-8000-000000000000" }),
3281                    None,
3282                )
3283                .await
3284                .expect_err("unknown doc must error");
3285            let s = format!("{err:?}");
3286            assert!(
3287                s.to_lowercase().contains("not found"),
3288                "expected 'not found' message; got: {s}"
3289            );
3290        });
3291        h.shutdown(&runtime);
3292    }
3293
3294    #[test]
3295    fn inspect_document_rejects_malformed_id() {
3296        let runtime = rt();
3297        let h = Harness::new(&runtime);
3298        runtime.block_on(async {
3299            let err = h
3300                .server
3301                .dispatch_tool(
3302                    "memory_inspect_document",
3303                    json!({ "doc_id": "not-a-uuid" }),
3304                    None,
3305                )
3306                .await
3307                .expect_err("malformed doc_id must error");
3308            let s = format!("{err:?}");
3309            assert!(s.contains("invalid doc_id"), "got: {s}");
3310        });
3311        h.shutdown(&runtime);
3312    }
3313
3314    #[test]
3315    fn list_documents_returns_empty_array_on_empty_db() {
3316        let runtime = rt();
3317        let h = Harness::new(&runtime);
3318        runtime.block_on(async {
3319            let r = h
3320                .server
3321                .dispatch_tool("memory_list_documents", json!({}), None)
3322                .await
3323                .expect("list succeeds");
3324            let text = first_text(&r);
3325            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
3326            assert!(v.is_array(), "expected array, got: {text}");
3327            assert_eq!(v.as_array().unwrap().len(), 0);
3328        });
3329        h.shutdown(&runtime);
3330    }
3331
3332    #[test]
3333    fn list_documents_passes_through_limit_offset_include_args() {
3334        let runtime = rt();
3335        let h = Harness::new(&runtime);
3336        runtime.block_on(async {
3337            let r = h
3338                .server
3339                .dispatch_tool(
3340                    "memory_list_documents",
3341                    json!({ "limit": 5, "offset": 10, "include_forgotten": true }),
3342                    None,
3343                )
3344                .await
3345                .expect("list with args succeeds");
3346            let text = first_text(&r);
3347            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
3348            assert!(v.is_array());
3349        });
3350        h.shutdown(&runtime);
3351    }
3352
3353    #[test]
3354    fn forget_document_rejects_malformed_id() {
3355        let runtime = rt();
3356        let h = Harness::new(&runtime);
3357        runtime.block_on(async {
3358            let err = h
3359                .server
3360                .dispatch_tool(
3361                    "memory_forget_document",
3362                    json!({ "doc_id": "not-a-uuid" }),
3363                    None,
3364                )
3365                .await
3366                .expect_err("malformed doc_id must error");
3367            let s = format!("{err:?}");
3368            assert!(s.contains("invalid doc_id"), "got: {s}");
3369        });
3370        h.shutdown(&runtime);
3371    }
3372
3373    // -----------------------------------------------------------------
3374    // v0.9.2 — `memory_remember_batch` + `salience` MCP layer tests.
3375    // -----------------------------------------------------------------
3376
3377    /// salience round-trip through `memory_remember`: an explicit
3378    /// in-range value reaches the writer; an absent value defaults
3379    /// to 0.5; an out-of-range value is rejected with invalid_params.
3380    #[test]
3381    fn remember_with_explicit_salience_round_trips() {
3382        let runtime = rt();
3383        let h = Harness::new(&runtime);
3384        runtime.block_on(async {
3385            let r = h
3386                .server
3387                .dispatch_tool(
3388                    "memory_remember",
3389                    json!({ "content": "with salience", "salience": 0.83 }),
3390                    None,
3391                )
3392                .await
3393                .expect("remember w/ salience succeeds");
3394            let text = first_text(&r);
3395            // Confirmation includes the new MemoryId.
3396            assert!(text.starts_with("remembered "), "got: {text}");
3397        });
3398        h.shutdown(&runtime);
3399    }
3400
3401    #[test]
3402    fn remember_with_out_of_range_salience_returns_invalid_params() {
3403        let runtime = rt();
3404        let h = Harness::new(&runtime);
3405        runtime.block_on(async {
3406            let err = h
3407                .server
3408                .dispatch_tool(
3409                    "memory_remember",
3410                    json!({ "content": "out of range", "salience": 1.5 }),
3411                    None,
3412                )
3413                .await
3414                .unwrap_err();
3415            let s = format!("{err:?}");
3416            assert!(s.contains("salience must be"), "got: {s}");
3417        });
3418        h.shutdown(&runtime);
3419    }
3420
3421    /// Salience boundary: 0.0 and 1.0 are both valid (inclusive range).
3422    #[test]
3423    fn remember_with_boundary_salience_succeeds() {
3424        let runtime = rt();
3425        let h = Harness::new(&runtime);
3426        runtime.block_on(async {
3427            for s in [0.0_f64, 1.0_f64] {
3428                let r = h
3429                    .server
3430                    .dispatch_tool(
3431                        "memory_remember",
3432                        json!({ "content": format!("boundary-{s}"), "salience": s }),
3433                        None,
3434                    )
3435                    .await
3436                    .expect("boundary salience succeeds");
3437                assert!(first_text(&r).starts_with("remembered "));
3438            }
3439        });
3440        h.shutdown(&runtime);
3441    }
3442
3443    /// Happy-path batch: 3 items go in, 3 memory_ids come out in order.
3444    #[test]
3445    fn remember_batch_returns_ids_in_order() {
3446        let runtime = rt();
3447        let h = Harness::new(&runtime);
3448        runtime.block_on(async {
3449            let items = json!([
3450                { "content": "batch-a" },
3451                { "content": "batch-b", "source_type": "user_preference", "salience": 0.9 },
3452                { "content": "batch-c", "salience": 0.1 },
3453            ]);
3454            let r = h
3455                .server
3456                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
3457                .await
3458                .expect("batch succeeds");
3459            let text = first_text(&r);
3460            let parsed: serde_json::Value = serde_json::from_str(&text).expect("reply is JSON");
3461            let arr = parsed.as_array().expect("reply is array");
3462            assert_eq!(arr.len(), 3, "3 items in → 3 ids out: {text}");
3463            // Each entry must be a UUID-shaped string.
3464            for v in arr {
3465                let s = v.as_str().unwrap_or_else(|| panic!("non-string id: {v}"));
3466                assert_eq!(s.len(), 36, "UUID-shaped id expected: {s}");
3467            }
3468            // Distinct ids.
3469            let mut ids: Vec<&str> = arr.iter().map(|v| v.as_str().unwrap()).collect();
3470            ids.sort();
3471            ids.dedup();
3472            assert_eq!(ids.len(), 3, "ids must be distinct: {text}");
3473        });
3474        h.shutdown(&runtime);
3475    }
3476
3477    /// Empty items → invalid_params before any embedding work.
3478    #[test]
3479    fn remember_batch_empty_items_returns_invalid_params() {
3480        let runtime = rt();
3481        let h = Harness::new(&runtime);
3482        runtime.block_on(async {
3483            let err = h
3484                .server
3485                .dispatch_tool("memory_remember_batch", json!({ "items": [] }), None)
3486                .await
3487                .unwrap_err();
3488            let s = format!("{err:?}");
3489            assert!(s.contains("must not be empty"), "got: {s}");
3490        });
3491        h.shutdown(&runtime);
3492    }
3493
3494    /// Per-item validation: empty content trips invalid_params with the
3495    /// index of the offending item baked into the message.
3496    #[test]
3497    fn remember_batch_rejects_per_item_empty_content() {
3498        let runtime = rt();
3499        let h = Harness::new(&runtime);
3500        runtime.block_on(async {
3501            let items = json!([
3502                { "content": "ok-1" },
3503                { "content": "   " },
3504                { "content": "ok-3" },
3505            ]);
3506            let err = h
3507                .server
3508                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
3509                .await
3510                .unwrap_err();
3511            let s = format!("{err:?}");
3512            assert!(s.contains("items[1]"), "must mention items[1]: {s}");
3513            assert!(s.contains("must not be empty"), "got: {s}");
3514        });
3515        h.shutdown(&runtime);
3516    }
3517
3518    /// Per-item validation: out-of-range salience trips invalid_params
3519    /// with the item index in the message.
3520    #[test]
3521    fn remember_batch_rejects_per_item_salience_out_of_range() {
3522        let runtime = rt();
3523        let h = Harness::new(&runtime);
3524        runtime.block_on(async {
3525            let items = json!([
3526                { "content": "ok-1", "salience": 0.5 },
3527                { "content": "out-of-range", "salience": -0.1 },
3528            ]);
3529            let err = h
3530                .server
3531                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
3532                .await
3533                .unwrap_err();
3534            let s = format!("{err:?}");
3535            assert!(s.contains("items[1]"), "must mention items[1]: {s}");
3536            assert!(s.contains("salience must be"), "got: {s}");
3537        });
3538        h.shutdown(&runtime);
3539    }
3540
3541    /// Over-cap batch is rejected at the MCP layer so we never embed
3542    /// 201+ items. Pinned at the same constant as the writer-actor.
3543    #[test]
3544    fn remember_batch_over_cap_returns_invalid_params() {
3545        let runtime = rt();
3546        let h = Harness::new(&runtime);
3547        runtime.block_on(async {
3548            let items: Vec<serde_json::Value> = (0..(solo_storage::MAX_REMEMBER_BATCH_SIZE + 1))
3549                .map(|i| json!({ "content": format!("over-{i}") }))
3550                .collect();
3551            let err = h
3552                .server
3553                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
3554                .await
3555                .unwrap_err();
3556            let s = format!("{err:?}");
3557            assert!(
3558                s.contains("MAX_REMEMBER_BATCH_SIZE"),
3559                "must mention the cap: {s}"
3560            );
3561        });
3562        h.shutdown(&runtime);
3563    }
3564
3565    // -----------------------------------------------------------------
3566    // v0.11.0 P3: per-tool progress event tests.
3567    //
3568    // These tests invoke `dispatch_tool` with a real
3569    // `ProgressReporter` wired to a fresh `SessionState`, then drain
3570    // the session's broadcast receiver to observe the emitted events.
3571    // The pattern mirrors `mcp_progress::tests::progress_reporter_*`
3572    // but exercises the full handler call stack (including the writer
3573    // and query pipelines) end-to-end.
3574    // -----------------------------------------------------------------
3575
3576    use crate::mcp_progress::{ProgressReporter, ProgressToken};
3577    use crate::mcp_session::SessionState;
3578    use std::sync::Arc as StdArc2;
3579
3580    fn fresh_progress_session() -> StdArc2<SessionState> {
3581        StdArc2::new(SessionState::new(
3582            solo_core::TenantId::default_tenant(),
3583            None,
3584        ))
3585    }
3586
3587    fn drain_progress_events(
3588        rx: &mut tokio::sync::broadcast::Receiver<crate::mcp_session::McpStreamEvent>,
3589    ) -> Vec<crate::mcp_session::McpStreamEvent> {
3590        let mut out = Vec::new();
3591        while let Ok(ev) = rx.try_recv() {
3592            out.push(ev);
3593        }
3594        out
3595    }
3596
3597    // v0.11.0 P3 note: `ingest_document_emits_progress_at_*` test lives
3598    // in `http::handler_tests` because the dispatch_tests harness uses
3599    // `WriterActor::spawn` (no embedder), so an end-to-end ingest panics
3600    // with "writer has no embedder". The handler_tests harness uses
3601    // `WriterActor::spawn_full` which carries an embedder; we exercise
3602    // the ingest progress checkpoints there.
3603
3604    /// v0.11.0 P3: `memory_search_docs` emits 3 progress events when
3605    /// `top_k` exceeds the threshold (100).
3606    #[test]
3607    fn search_docs_emits_progress_only_when_top_k_above_100() {
3608        let runtime = rt();
3609        let h = Harness::new(&runtime);
3610        runtime.block_on(async {
3611            let session = fresh_progress_session();
3612            let mut rx = session.subscribe_events();
3613            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!(42)));
3614            let _r = h
3615                .server
3616                .dispatch_tool(
3617                    "memory_search_docs",
3618                    json!({ "query": "anything", "limit": 150 }),
3619                    Some(reporter),
3620                )
3621                .await
3622                .expect("search succeeds");
3623            let events = drain_progress_events(&mut rx);
3624            assert_eq!(
3625                events.len(),
3626                3,
3627                "expected 3 search progress events at top_k=150, got {}",
3628                events.len()
3629            );
3630            // Spec shape: every event uses progressToken (echoed as
3631            // number 42) and walks progress 1..=3.
3632            for (i, ev) in events.iter().enumerate() {
3633                let params = &ev.data["params"];
3634                assert_eq!(params["progressToken"], json!(42));
3635                assert_eq!(params["total"], json!(3));
3636                assert_eq!(params["progress"], json!((i + 1) as u64));
3637            }
3638        });
3639        h.shutdown(&runtime);
3640    }
3641
3642    /// v0.11.0 P3: `memory_search_docs` with `top_k <= 100` does NOT
3643    /// emit progress events even when a reporter is wired. Threshold
3644    /// gating per Decision C.
3645    #[test]
3646    fn search_docs_emits_no_progress_when_top_k_below_threshold() {
3647        let runtime = rt();
3648        let h = Harness::new(&runtime);
3649        runtime.block_on(async {
3650            let session = fresh_progress_session();
3651            let mut rx = session.subscribe_events();
3652            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("t")));
3653            let _r = h
3654                .server
3655                .dispatch_tool(
3656                    "memory_search_docs",
3657                    json!({ "query": "anything", "limit": 50 }),
3658                    Some(reporter),
3659                )
3660                .await
3661                .expect("search succeeds");
3662            let events = drain_progress_events(&mut rx);
3663            assert!(
3664                events.is_empty(),
3665                "expected no progress events at top_k=50, got {events:?}"
3666            );
3667        });
3668        h.shutdown(&runtime);
3669    }
3670
3671    /// v0.11.0 P3: `memory_remember_batch` with > 50 items emits
3672    /// per-25-items embed progress + a final "embedded" + "inserted"
3673    /// event. A 51-item batch fires at items 25, 50, then embedded
3674    /// (51/51), then inserted (51/51) = 4 events.
3675    #[test]
3676    fn remember_batch_emits_progress_only_when_size_above_50() {
3677        let runtime = rt();
3678        let h = Harness::new(&runtime);
3679        runtime.block_on(async {
3680            let session = fresh_progress_session();
3681            let mut rx = session.subscribe_events();
3682            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("batch")));
3683            let items: Vec<serde_json::Value> = (0..51)
3684                .map(|i| json!({ "content": format!("item-{i}") }))
3685                .collect();
3686            let _r = h
3687                .server
3688                .dispatch_tool(
3689                    "memory_remember_batch",
3690                    json!({ "items": items }),
3691                    Some(reporter),
3692                )
3693                .await
3694                .expect("batch succeeds");
3695            let events = drain_progress_events(&mut rx);
3696            assert_eq!(
3697                events.len(),
3698                4,
3699                "expected 4 batch progress events for 51 items, got {}: {events:?}",
3700                events.len()
3701            );
3702            // First event = 25/51 "embedding"; second = 50/51 "embedding";
3703            // third = 51/51 "embedded"; fourth = 51/51 "inserted".
3704            let progresses: Vec<u64> = events
3705                .iter()
3706                .map(|e| e.data["params"]["progress"].as_u64().unwrap_or(0))
3707                .collect();
3708            assert_eq!(progresses, vec![25, 50, 51, 51]);
3709            assert_eq!(
3710                events.last().unwrap().data["params"]["message"],
3711                json!("inserted")
3712            );
3713            for ev in &events {
3714                assert_eq!(ev.data["params"]["progressToken"], json!("batch"));
3715                assert_eq!(ev.data["params"]["total"], json!(51));
3716            }
3717        });
3718        h.shutdown(&runtime);
3719    }
3720
3721    /// v0.11.0 P3: small batches (<= 50) do NOT emit progress events
3722    /// even with a reporter wired. Wire-overhead gating per Decision C.
3723    #[test]
3724    fn remember_batch_emits_no_progress_when_size_below_threshold() {
3725        let runtime = rt();
3726        let h = Harness::new(&runtime);
3727        runtime.block_on(async {
3728            let session = fresh_progress_session();
3729            let mut rx = session.subscribe_events();
3730            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("t")));
3731            // 5 items — well below the threshold.
3732            let items: Vec<serde_json::Value> = (0..5)
3733                .map(|i| json!({ "content": format!("small-{i}") }))
3734                .collect();
3735            let _r = h
3736                .server
3737                .dispatch_tool(
3738                    "memory_remember_batch",
3739                    json!({ "items": items }),
3740                    Some(reporter),
3741                )
3742                .await
3743                .expect("batch succeeds");
3744            let events = drain_progress_events(&mut rx);
3745            assert!(
3746                events.is_empty(),
3747                "expected no progress events for 5-item batch, got {events:?}"
3748            );
3749        });
3750        h.shutdown(&runtime);
3751    }
3752
3753    /// v0.11.0 P3: stdio-style calls (no session = no progress reporter)
3754    /// must not panic and must produce no events. This pins the
3755    /// backward-compat invariant the rmcp `call_tool` path relies on.
3756    /// Uses `memory_search_docs` (no embedder dependency in the
3757    /// dispatch_tests harness) — the equivalent ingest_document
3758    /// "no progress" guarantee is asserted in `http::handler_tests`
3759    /// via the same `None` path.
3760    #[test]
3761    fn stdio_transport_does_not_emit_progress_events() {
3762        let runtime = rt();
3763        let h = Harness::new(&runtime);
3764        runtime.block_on(async {
3765            // Construct a session purely for the rx end — the tool call
3766            // gets `None`, so the session must NOT receive anything.
3767            let session = fresh_progress_session();
3768            let mut rx = session.subscribe_events();
3769            let _r = h
3770                .server
3771                .dispatch_tool(
3772                    "memory_search_docs",
3773                    // Above the threshold so progress WOULD fire if a
3774                    // reporter were wired — but no reporter = no events.
3775                    json!({ "query": "anything", "limit": 200 }),
3776                    None, // stdio: no reporter
3777                )
3778                .await
3779                .expect("search succeeds without reporter");
3780            let events = drain_progress_events(&mut rx);
3781            assert!(
3782                events.is_empty(),
3783                "stdio path (no reporter) must not publish to ANY session: {events:?}"
3784            );
3785        });
3786        h.shutdown(&runtime);
3787    }
3788
3789    /// v0.11.0 P3: emitted event ids are monotonically increasing per
3790    /// session across multiple tool calls. Pinned to surface any
3791    /// regression in `SessionState::publish_event`'s id allocator.
3792    #[test]
3793    fn progress_event_id_monotonic_per_session() {
3794        let runtime = rt();
3795        let h = Harness::new(&runtime);
3796        runtime.block_on(async {
3797            let session = fresh_progress_session();
3798            let mut rx = session.subscribe_events();
3799            // Two calls in sequence with progress; observe interleaved
3800            // ids stay strictly increasing.
3801            let r1 = ProgressReporter::new(session.clone(), ProgressToken(json!("a")));
3802            let r2 = ProgressReporter::new(session.clone(), ProgressToken(json!("b")));
3803            let _ = h
3804                .server
3805                .dispatch_tool(
3806                    "memory_search_docs",
3807                    json!({ "query": "q1", "limit": 150 }),
3808                    Some(r1),
3809                )
3810                .await;
3811            let _ = h
3812                .server
3813                .dispatch_tool(
3814                    "memory_search_docs",
3815                    json!({ "query": "q2", "limit": 150 }),
3816                    Some(r2),
3817                )
3818                .await;
3819            let events = drain_progress_events(&mut rx);
3820            assert!(events.len() >= 6, "expected at least 6 events: {events:?}");
3821            let ids: Vec<u64> = events.iter().map(|e| e.id).collect();
3822            for w in ids.windows(2) {
3823                assert!(w[0] < w[1], "event ids must be strictly monotonic: {ids:?}");
3824            }
3825        });
3826        h.shutdown(&runtime);
3827    }
3828}
3829
3830// ===========================================================================
3831// v0.8.1 P2: MCP audit principal extraction
3832// ===========================================================================
3833//
3834// These tests live in their own module because they manipulate the
3835// `SOLO_MCP_PRINCIPAL_TOKEN` env var, which is process-global mutable
3836// state. Serialised via a static `Mutex` so cargo test's multi-threaded
3837// runner doesn't race. Pattern mirrors the env-guard discipline in
3838// `solo_cli::commands::common::ollama_overrides_tests`.
3839
3840#[cfg(test)]
3841mod principal_extraction_tests {
3842    use super::*;
3843    use std::sync::Mutex;
3844
3845    /// Serialise tests that mutate `SOLO_MCP_PRINCIPAL_TOKEN`. Poisoned
3846    /// guards are recovered via `into_inner` so one panicking test
3847    /// doesn't sink the rest of the suite.
3848    static ENV_LOCK: Mutex<()> = Mutex::new(());
3849
3850    /// RAII guard that unsets the env var on drop, so a panicking test
3851    /// doesn't leak state into the next case.
3852    struct EnvGuard;
3853    impl Drop for EnvGuard {
3854        fn drop(&mut self) {
3855            // SAFETY: every caller holds ENV_LOCK across construct + drop.
3856            unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
3857        }
3858    }
3859
3860    fn set_principal_env(val: &str) -> EnvGuard {
3861        // SAFETY: ENV_LOCK held by caller.
3862        unsafe { std::env::set_var(ENV_MCP_PRINCIPAL_TOKEN, val) };
3863        EnvGuard
3864    }
3865
3866    fn clear_principal_env() -> EnvGuard {
3867        // SAFETY: ENV_LOCK held by caller.
3868        unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
3869        EnvGuard
3870    }
3871
3872    /// Stdio path: setting `SOLO_MCP_PRINCIPAL_TOKEN` produces a
3873    /// non-None principal at construction time.
3874    #[test]
3875    fn stdio_env_var_resolves_to_principal() {
3876        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3877        let _g = set_principal_env("alice-token");
3878        let resolved = resolve_mcp_principal(None);
3879        assert_eq!(resolved.as_deref(), Some("alice-token"));
3880    }
3881
3882    /// Stdio path: absent env var ⇒ `None` (regression — must preserve
3883    /// v0.8.0 behaviour for users without auth).
3884    #[test]
3885    fn stdio_no_env_var_resolves_to_none() {
3886        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3887        let _g = clear_principal_env();
3888        assert_eq!(resolve_mcp_principal(None), None);
3889    }
3890
3891    /// Stdio path: whitespace-only env var ⇒ `None` (don't pin every
3892    /// audit row to an empty/blank principal because of a launcher
3893    /// typo).
3894    #[test]
3895    fn stdio_whitespace_env_var_resolves_to_none() {
3896        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3897        let _g = set_principal_env("   \t  ");
3898        assert_eq!(resolve_mcp_principal(None), None);
3899    }
3900
3901    /// HTTP-MCP path: `Authorization: Bearer <token>` header resolves
3902    /// to the token as principal.
3903    #[test]
3904    fn http_header_resolves_to_bearer_token_principal() {
3905        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3906        let _g = clear_principal_env();
3907        let resolved = resolve_mcp_principal(Some("Bearer api-token-xyz"));
3908        assert_eq!(resolved.as_deref(), Some("api-token-xyz"));
3909    }
3910
3911    /// Precedence: when both env var AND header carry a token, the
3912    /// header wins (consistent with the rest of the auth stack — JWT
3913    /// claim beats `X-Solo-Tenant` header).
3914    #[test]
3915    fn http_header_beats_env_var() {
3916        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3917        let _g = set_principal_env("env-token");
3918        let resolved = resolve_mcp_principal(Some("Bearer header-token"));
3919        assert_eq!(
3920            resolved.as_deref(),
3921            Some("header-token"),
3922            "header MUST win over env var per documented precedence"
3923        );
3924    }
3925
3926    /// HTTP-MCP path: malformed header (no `Bearer ` prefix) ⇒ falls
3927    /// through to env-var path.
3928    #[test]
3929    fn http_malformed_header_falls_through_to_env() {
3930        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3931        let _g = set_principal_env("env-fallback");
3932        let resolved = resolve_mcp_principal(Some("Basic dXNlcjpwYXNz"));
3933        assert_eq!(resolved.as_deref(), Some("env-fallback"));
3934    }
3935
3936    /// HTTP-MCP path: empty bearer header (`Bearer ` with no token)
3937    /// falls through to env-var path. Matches the spirit of the
3938    /// whitespace-env-var rejection — don't credit a half-formed
3939    /// header.
3940    #[test]
3941    fn http_empty_bearer_header_falls_through_to_env() {
3942        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3943        let _g = set_principal_env("env-fallback");
3944        let resolved = resolve_mcp_principal(Some("Bearer   "));
3945        assert_eq!(resolved.as_deref(), Some("env-fallback"));
3946    }
3947
3948    /// Across N consecutive calls of `resolve_mcp_principal`, the
3949    /// resolved principal is stable for the same env-var setting
3950    /// (regression guard: an accidental thread-local cache would
3951    /// break the "stable across N tool calls in one session" contract
3952    /// the brief calls out).
3953    #[test]
3954    fn stable_across_multiple_resolutions() {
3955        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3956        let _g = set_principal_env("stable-token");
3957        for _ in 0..5 {
3958            assert_eq!(resolve_mcp_principal(None).as_deref(), Some("stable-token"));
3959        }
3960    }
3961}
3962
3963/// v0.9.0 P2 tests for the MCP-initialize-time LLM-config gate.
3964///
3965/// Pure-function tests of [`initialize_decision`]: no rmcp Peer is
3966/// constructed (the type's constructors are private), no MCP handshake
3967/// is driven. The wire-up between `initialize_decision` and the
3968/// side-effect path lives in [`SoloMcpServer::initialize`] and is
3969/// covered indirectly by the audit-row tests in
3970/// [`crate::llm::sampling::tests`] — those exercise the same
3971/// `SamplingLlmClient` + `WriteCommand::EmitLlmSamplingAudit` path
3972/// that `populate_sampling_steward` constructs.
3973#[cfg(test)]
3974mod initialize_decision_tests {
3975    use super::*;
3976    use solo_storage::LlmSettings;
3977
3978    /// `[llm]` absent → always Allow (matches v0.8.x behaviour).
3979    #[test]
3980    fn no_llm_block_allows_initialize_regardless_of_sampling_capability() {
3981        assert_eq!(initialize_decision(&None, false), InitializeDecision::Allow);
3982        assert_eq!(initialize_decision(&None, true), InitializeDecision::Allow);
3983    }
3984
3985    /// `[llm] mode = "none"` → always Allow.
3986    #[test]
3987    fn llm_none_allows_initialize_regardless_of_sampling_capability() {
3988        let s = Some(LlmSettings::None);
3989        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
3990        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
3991    }
3992
3993    /// `[llm] mode = "anthropic"` → always Allow.
3994    #[test]
3995    fn llm_anthropic_allows_initialize_regardless_of_sampling_capability() {
3996        let s = Some(LlmSettings::Anthropic {
3997            api_key_env: "ANTHROPIC_API_KEY".into(),
3998            model: "claude-sonnet-4-6".into(),
3999        });
4000        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
4001        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
4002    }
4003
4004    /// `[llm] mode = "ollama"` → always Allow.
4005    #[test]
4006    fn llm_ollama_allows_initialize_regardless_of_sampling_capability() {
4007        let s = Some(LlmSettings::Ollama {
4008            base_url: "http://localhost:11434".into(),
4009            model: "qwen3-coder:30b".into(),
4010        });
4011        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
4012        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
4013    }
4014
4015    /// `[llm] mode = "mcp_sampling"` + peer with sampling capability →
4016    /// populate the slot.
4017    #[test]
4018    fn llm_mcp_sampling_with_sampling_capability_populates_slot() {
4019        let s = Some(LlmSettings::McpSampling);
4020        assert_eq!(
4021            initialize_decision(&s, true),
4022            InitializeDecision::PopulateSamplingSteward
4023        );
4024    }
4025
4026    /// `[llm] mode = "mcp_sampling"` + peer WITHOUT sampling
4027    /// capability → reject initialize with the locked BLOCKER 2 error.
4028    #[test]
4029    fn llm_mcp_sampling_without_sampling_capability_rejects() {
4030        let s = Some(LlmSettings::McpSampling);
4031        assert_eq!(
4032            initialize_decision(&s, false),
4033            InitializeDecision::RejectMissingSamplingCapability
4034        );
4035    }
4036
4037    /// The locked BLOCKER 2 error message body is byte-stable: a future
4038    /// audit-revision can grep these strings and confirm they still
4039    /// land.
4040    #[test]
4041    fn sampling_capability_missing_error_message_contains_all_alternatives() {
4042        let msg = sampling_capability_missing_error_message();
4043        // Banner + four alternative blocks.
4044        assert!(msg.contains("LLM backend `mcp_sampling`"));
4045        assert!(msg.contains("mode = \"anthropic\""));
4046        assert!(msg.contains("api_key_env = \"ANTHROPIC_API_KEY\""));
4047        assert!(msg.contains("mode = \"openai\""));
4048        assert!(msg.contains("api_key_env = \"OPENAI_API_KEY\""));
4049        assert!(msg.contains("mode = \"ollama\""));
4050        assert!(msg.contains("base_url = \"http://localhost:11434\""));
4051        assert!(msg.contains("mode = \"none\""));
4052        // Footer pointer at the release-prep doc.
4053        assert!(msg.contains("docs/releases/v0.9.0.md"));
4054    }
4055}
4056
4057// fetch_recall_rows + RecallHit + RecallRow used to live here. Recall
4058// pipeline moved to solo_query::recall in commit (consolidate-recall);
4059// transports just call solo_query::run_recall and format the result.