Skip to main content

solo_api/
mcp.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! MCP (Model Context Protocol) server for Solo.
4//!
5//! Exposes eighteen tools to MCP clients (Claude Desktop, Cursor, etc.):
6//!
7//! Episode tools (v0.1+, with v0.9.2 additions):
8//!   - `memory_remember(content, source_type?, source_id?, salience?)` —
9//!     store an episode. Returns the new MemoryId. `salience` (v0.9.2+)
10//!     is optional in [0.0, 1.0] and defaults to 0.5.
11//!   - `memory_remember_batch(items)` (v0.9.2+) — atomically store N
12//!     episodes in one writer-actor transaction. Each item has the
13//!     same fields as `memory_remember`. Returns an ordered array of
14//!     MemoryIds; either all items persist or none do.
15//!   - `memory_recall(query, limit?)` — vector search. Returns the top-K
16//!     matches with content + tier + status.
17//!   - `memory_context(query, subject?, window_days?, limit?)` — combined
18//!     recall + themes + facts + contradictions bundle for agent context.
19//!   - `memory_update(memory_id, content)` — correct/supersede an active
20//!     episode's content and refresh its embedding/index row.
21//!   - `memory_forget(memory_id, reason?)` — soft-delete an episode.
22//!   - `memory_inspect(memory_id)` — return the full episode record.
23//!
24//! Derived-layer tools (v0.4.0+):
25//!   - `memory_themes(window_days?, limit?)` — list cluster themes.
26//!   - `memory_facts_about(subject, ...)` — query the structured-fact
27//!     knowledge graph (subject-predicate-object triples).
28//!   - `memory_entities(query, limit?)` — discover entity ids from the
29//!     structured-fact graph.
30//!   - `memory_contradictions(limit?)` — disagreements flagged during
31//!     consolidation.
32//!   - `memory_contradiction_resolve(...)` — mark a contradiction resolved,
33//!     unresolved, or reopened.
34//!
35//! Derived-layer tools (v0.5.0+):
36//!   - `memory_inspect_cluster(cluster_id, full_content?)` — drill
37//!     into one cluster's abstraction + source episodes (truncated).
38//!
39//! Document tools (v0.7.0+):
40//!   - `memory_ingest_document(path)` — read a file from disk, split it
41//!     into chunks, embed each, and store under documents/document_chunks.
42//!   - `memory_search_docs(query, limit?)` — vector search restricted to
43//!     document chunks; returns chunk content + parent-doc context.
44//!   - `memory_inspect_document(doc_id)` — show one document's metadata
45//!     plus a previewed list of its chunks.
46//!   - `memory_list_documents(limit?, offset?, include_forgotten?)` —
47//!     paginate over ingested documents, newest first.
48//!   - `memory_forget_document(doc_id)` — soft-delete a document; chunks
49//!     stop appearing in `memory_search_docs` and tombstone in HNSW.
50//!
51//! ## Transport
52//!
53//! `serve_stdio` wires the server to stdin/stdout for use as a subprocess
54//! ("`claude_desktop_config.json` or `~/.cursor/mcp.json` invokes
55//! `solo mcp-stdio`"). The function awaits a graceful shutdown when stdin
56//! closes (parent disconnects) — same lifecycle as `solo daemon`'s
57//! Ctrl+C path.
58//!
59//! ## What's deferred
60//!
61//! - SSE/HTTP transports — `rmcp` ships them, but v0.1 ships stdio only.
62//! - `prompts/` and `resources/` capabilities — not needed for the
63//!   four-tool surface; ServerHandler defaults return empty lists.
64//! - Tool argument validation beyond JSON Schema typing — we trust rmcp
65//!   to deserialize per the schema, then serde-deserialize into our
66//!   typed param structs. Bad inputs surface as clear errors.
67
68use std::sync::Arc;
69
70use rmcp::handler::server::ServerHandler;
71use rmcp::model::{
72    CallToolRequestParams as CallToolRequestParam, CallToolResult, Content, Implementation,
73    InitializeRequestParams, InitializeResult, ListToolsResult,
74    PaginatedRequestParams as PaginatedRequestParam, ProtocolVersion, ServerCapabilities,
75    ServerInfo, Tool,
76};
77use rmcp::service::{RequestContext, RoleServer};
78use rmcp::{ErrorData as McpError, ServiceExt};
79use serde::{Deserialize, Serialize};
80use solo_core::{Confidence, DocumentId, EncodingContext, Episode, MemoryId, Tier};
81use solo_storage::{TenantHandle, TenantRegistry};
82use std::str::FromStr;
83
84/// The MCP server. Cheap to clone — every field is `Arc`-cloneable.
85///
86/// v0.8.0 P2: an MCP session resolves to **one tenant**. The session's
87/// `tenant_handle` is resolved at `initialize` time (today: from the
88/// CLI invocation via `solo mcp-stdio --tenant <id>`; future versions
89/// may resolve per-bearer-token via OIDC). Subsequent `tools/call`
90/// invocations route through the cached handle without re-resolving.
91/// Operators that need multi-tenant MCP spawn one `solo mcp-stdio`
92/// subprocess per tenant.
93#[derive(Clone)]
94pub struct SoloMcpServer {
95    inner: Arc<Inner>,
96}
97
98struct Inner {
99    /// Multi-tenant registry shared across all sessions. Held so that a
100    /// future MCP capability that lists/inspects other tenants has a
101    /// path to them (out of scope for v0.8.0 P2). P3 (auth) will use
102    /// this to re-resolve the tenant from a bearer-token claim.
103    #[allow(dead_code)]
104    registry: Arc<TenantRegistry>,
105    /// The tenant this MCP session speaks for. Resolved at session
106    /// construction time.
107    tenant: Arc<TenantHandle>,
108    /// Read-path aliases for the canonical `"user"` subject. Sourced
109    /// from `solo.config.toml` `[identity] user_aliases`; threaded
110    /// through to `solo_query::facts_about` so a query for `"alex"`
111    /// also surfaces rows historically extracted as `"user"`. Empty
112    /// vec = behave as today (no expansion).
113    user_aliases: Vec<String>,
114    /// v0.8.0 P4 audit-log principal for this MCP session. MCP is
115    /// bearer-only (no OIDC story in the spec), so the principal is
116    /// effectively `"bearer"` when the daemon was started with
117    /// `--bearer-token-file` and `None` otherwise. Persisted here so
118    /// every tool dispatch threads it into the audit emit without
119    /// reconstructing it per call.
120    audit_principal: Option<String>,
121}
122
123/// v0.9.0 P2: outcome of inspecting the tenant's `[llm]` config + the
124/// peer's `sampling` capability at MCP `initialize` time.
125///
126/// Separating the decision from the actual slot write makes the
127/// gating logic unit-testable without needing a real
128/// `rmcp::Peer<RoleServer>` (whose constructors are private).
129/// `SoloMcpServer::initialize` performs the match and routes to the
130/// side-effect path; tests pin the table directly.
131#[derive(Debug, Clone, Copy, PartialEq, Eq)]
132pub enum InitializeDecision {
133    /// Tenant's LLM backend doesn't require an MCP peer; the slot was
134    /// populated eagerly at registry-open time (or stays `None` for
135    /// `LlmConfig::None`). MCP initialize succeeds without writing the
136    /// slot.
137    Allow,
138    /// Tenant's LLM backend is `mcp_sampling` AND the peer advertised
139    /// the `sampling` capability. `populate_sampling_steward` writes a
140    /// peer-bound Steward into the slot.
141    PopulateSamplingSteward,
142    /// Tenant's LLM backend is `mcp_sampling` but the peer did NOT
143    /// advertise the `sampling` capability. MCP initialize must refuse
144    /// with the locked BLOCKER 2 error message.
145    RejectMissingSamplingCapability,
146}
147
148/// v0.9.0 P2: decide the initialize outcome given the tenant's
149/// `[llm]` config and whether the peer advertised the `sampling`
150/// capability.
151///
152/// Pure function — no side effects, no rmcp peer required. Pinned by
153/// `initialize_decision_*` tests.
154pub fn initialize_decision(
155    llm_settings: &Option<solo_storage::LlmSettings>,
156    peer_sampling_supported: bool,
157) -> InitializeDecision {
158    match llm_settings {
159        Some(settings) if settings.requires_mcp_peer() => {
160            if peer_sampling_supported {
161                InitializeDecision::PopulateSamplingSteward
162            } else {
163                InitializeDecision::RejectMissingSamplingCapability
164            }
165        }
166        _ => InitializeDecision::Allow,
167    }
168}
169
170/// v0.9.0 P2: locked error message body for both the daemon-startup
171/// rejection guard and the MCP `initialize` capability gate (plan §3
172/// Decision 4 / BLOCKER 2 resolution). Returned verbatim to the
173/// operator so the commented-out TOML snippets are copy-pasteable.
174///
175/// Lives at module scope so the daemon startup path (in `solo-cli`)
176/// and the `SoloMcpServer::initialize` hook share one source of truth
177/// — a future audit-revision can grep the locked phrasing without
178/// chasing two divergent copies.
179pub fn sampling_capability_missing_error_message() -> String {
180    [
181        "LLM backend `mcp_sampling` requires a connected MCP client that",
182        "advertises the `sampling` capability at initialize. Either the",
183        "current MCP client does not support sampling, or this Solo",
184        "process is running in daemon-only mode (no peer to call back).",
185        "",
186        "Pick one of:",
187        "",
188        "  # Anthropic (hosted):",
189        "  [llm]",
190        "  mode = \"anthropic\"",
191        "  api_key_env = \"ANTHROPIC_API_KEY\"",
192        "  model = \"claude-sonnet-4-6\"",
193        "",
194        "  # OpenAI (hosted):",
195        "  [llm]",
196        "  mode = \"openai\"",
197        "  api_key_env = \"OPENAI_API_KEY\"",
198        "  model = \"gpt-5o\"",
199        "",
200        "  # Ollama (local daemon):",
201        "  [llm]",
202        "  mode = \"ollama\"",
203        "  base_url = \"http://localhost:11434\"",
204        "  model = \"qwen3-coder:30b\"",
205        "",
206        "  # None (cluster-only; abstractions skipped):",
207        "  [llm]",
208        "  mode = \"none\"",
209        "",
210        "See docs/releases/v0.9.0.md \u{00a7}LLM-backend selection for details.",
211    ]
212    .join("\n")
213}
214
215/// v0.8.1 P2: env var name MCP clients set when launching the server
216/// process to attribute audit rows on the stdio transport. Closes the
217/// v0.8.0 known-issue gap where MCP audit rows always carried
218/// `principal_subject = NULL` on the daemon path.
219///
220/// Precedence (when the future HTTP-MCP transport lands):
221///   1. `Authorization: Bearer <token>` header on the HTTP-MCP request
222///      (resolved through `AuthConfig::Bearer` validator).
223///   2. `SOLO_MCP_PRINCIPAL_TOKEN` env var on the spawned process.
224///
225/// For the v0.8.x stdio-only world only the env-var path applies; the
226/// header path is a no-op (no HTTP transport wired). The constant lives
227/// at module scope so external callers (CLI subcommand, tests) reference
228/// it by name rather than re-typing the string literal.
229pub const ENV_MCP_PRINCIPAL_TOKEN: &str = "SOLO_MCP_PRINCIPAL_TOKEN";
230
231/// v0.8.1 P2: resolve the MCP-session principal at `initialize`-time.
232///
233/// Reads `SOLO_MCP_PRINCIPAL_TOKEN` env var (stdio path); future HTTP-MCP
234/// callers will pass the bearer header value in via the explicit
235/// `header_value` arg. The header beats the env when both are present.
236///
237/// Returns `Some(subject)` on resolution success; `None` when neither
238/// source carries a non-empty value. Empty / whitespace-only values are
239/// treated as absent so an accidentally-set `SOLO_MCP_PRINCIPAL_TOKEN=""`
240/// in a launcher script doesn't pin every audit row to a blank principal.
241///
242/// The current implementation treats the env var value as the principal
243/// subject directly. A future hardening pass can validate against the
244/// daemon's `[auth] bearer.token` config to refuse mismatched tokens —
245/// today the env var is operator-trusted (same trust model as
246/// `SOLO_PASSPHRASE`).
247pub fn resolve_mcp_principal(header_value: Option<&str>) -> Option<String> {
248    // HTTP-MCP path wins when configured.
249    if let Some(h) = header_value {
250        if let Some(token) = h.strip_prefix("Bearer ") {
251            let trimmed = token.trim();
252            if !trimmed.is_empty() {
253                // Header carries the raw bearer token. Same shape as the
254                // stdio env-var path: the *value* is the principal
255                // subject in v0.8.1; v0.8.2+ may validate against a
256                // configured token set and surface the JWT `sub` claim
257                // instead.
258                return Some(trimmed.to_string());
259            }
260        }
261    }
262    // Stdio env-var fallback.
263    match std::env::var(ENV_MCP_PRINCIPAL_TOKEN) {
264        Ok(v) => {
265            let trimmed = v.trim();
266            if trimmed.is_empty() {
267                None
268            } else {
269                Some(trimmed.to_string())
270            }
271        }
272        Err(_) => None,
273    }
274}
275
276impl SoloMcpServer {
277    /// Build a server speaking for `tenant` (v0.8.0 P2 — one MCP session
278    /// ↔ one tenant). The registry is held so future capabilities can
279    /// reach across tenants if needed; today every handler routes
280    /// through `self.inner.tenant`.
281    ///
282    /// v0.8.1 P2: auto-resolves the audit principal from the
283    /// `SOLO_MCP_PRINCIPAL_TOKEN` env var (see [`resolve_mcp_principal`]).
284    /// When neither the env var nor a header is set, the principal stays
285    /// `None` — preserving v0.8.0 behavior for single-user setups.
286    pub fn new_for_tenant(
287        registry: Arc<TenantRegistry>,
288        tenant: Arc<TenantHandle>,
289        user_aliases: Vec<String>,
290    ) -> Self {
291        let principal = resolve_mcp_principal(None);
292        Self::new_for_tenant_with_principal(registry, tenant, user_aliases, principal)
293    }
294
295    /// v0.8.0 P4: like [`Self::new_for_tenant`], but records an explicit
296    /// audit principal subject for every tool dispatch. MCP is
297    /// bearer-only at v0.8.0 — the orchestration layer (today: the
298    /// daemon's `--bearer-token-file` path) decides whether a session
299    /// counts as "bearer-authenticated" and passes `Some("bearer")`;
300    /// CLI / unauth paths pass `None`.
301    ///
302    /// v0.8.1 P2: when the caller passes `audit_principal = None`, the
303    /// env-var auto-resolution still runs (in `new_for_tenant`). Callers
304    /// who want to *explicitly* suppress env-var resolution can call
305    /// this method with `None` after `std::env::remove_var(...)`, or use
306    /// the dedicated test constructor that bypasses env reads.
307    pub fn new_for_tenant_with_principal(
308        registry: Arc<TenantRegistry>,
309        tenant: Arc<TenantHandle>,
310        user_aliases: Vec<String>,
311        audit_principal: Option<String>,
312    ) -> Self {
313        Self {
314            inner: Arc::new(Inner {
315                registry,
316                tenant,
317                user_aliases,
318                audit_principal,
319            }),
320        }
321    }
322}
323
324/// Convenience: run the server over stdio and await its termination.
325/// Returns when stdin closes (parent disconnect) or the runtime exits.
326pub async fn serve_stdio(server: SoloMcpServer) -> anyhow::Result<()> {
327    use rmcp::transport::io::stdio;
328    let (stdin, stdout) = stdio();
329    let running = server.serve((stdin, stdout)).await?;
330    running.waiting().await?;
331    Ok(())
332}
333
334// ---------------------------------------------------------------------------
335// Tool argument schemas
336// ---------------------------------------------------------------------------
337
338#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct RememberArgs {
340    pub content: String,
341    #[serde(default)]
342    pub source_type: Option<String>,
343    #[serde(default)]
344    pub source_id: Option<String>,
345    /// v0.9.2 — optional salience in [0.0, 1.0]. `None` → 0.5 (preserves
346    /// pre-v0.9.2 behaviour). Out-of-range values are rejected by
347    /// [`Self::validate_salience`] before reaching the writer.
348    #[serde(default)]
349    pub salience: Option<f32>,
350}
351
352/// v0.9.2 — one item in a `memory_remember_batch` request.
353///
354/// Mirrors [`RememberArgs`] field-for-field minus the wrapper-tool
355/// invariant: callers pass an array of these inside [`RememberBatchArgs`].
356/// All items in a batch are persisted in a single `BEGIN IMMEDIATE`
357/// transaction (per dev-log 0120 §3 Decision A) so partial-failure
358/// scenarios are impossible from the client's perspective — either
359/// every item lands or none do.
360#[derive(Debug, Clone, Serialize, Deserialize)]
361pub struct RememberItem {
362    pub content: String,
363    #[serde(default)]
364    pub source_type: Option<String>,
365    #[serde(default)]
366    pub source_id: Option<String>,
367    /// Optional salience in [0.0, 1.0]; `None` → 0.5. See
368    /// [`RememberArgs::salience`].
369    #[serde(default)]
370    pub salience: Option<f32>,
371}
372
373/// v0.9.2 — args for the new `memory_remember_batch` MCP tool.
374///
375/// Wraps `Vec<RememberItem>`. The handler validates `items.is_empty()`
376/// and `items.len() > MAX_REMEMBER_BATCH_SIZE` before any embedding
377/// work; per-item content/salience is validated immediately afterwards.
378#[derive(Debug, Clone, Serialize, Deserialize)]
379pub struct RememberBatchArgs {
380    pub items: Vec<RememberItem>,
381}
382
383/// Validate that an optional salience value is well-formed (NaN-free
384/// and inside `[0.0, 1.0]`). Centralised so both `memory_remember` and
385/// `memory_remember_batch` share the same rejection shape.
386fn validate_salience(salience: Option<f32>) -> std::result::Result<(), McpError> {
387    if let Some(s) = salience {
388        if !s.is_finite() || !(0.0..=1.0).contains(&s) {
389            return Err(McpError::invalid_params(
390                format!("salience must be in [0.0, 1.0]; got {s}"),
391                None,
392            ));
393        }
394    }
395    Ok(())
396}
397
398#[derive(Debug, Clone, Serialize, Deserialize)]
399pub struct RecallArgs {
400    pub query: String,
401    #[serde(default = "default_limit")]
402    pub limit: usize,
403}
404
405#[derive(Debug, Clone, Serialize, Deserialize)]
406pub struct MemoryContextArgs {
407    pub query: String,
408    #[serde(default)]
409    pub subject: Option<String>,
410    #[serde(default)]
411    pub window_days: Option<i64>,
412    #[serde(default = "default_limit")]
413    pub limit: usize,
414}
415
416fn default_limit() -> usize {
417    5
418}
419
420#[derive(Debug, Clone, Serialize, Deserialize)]
421pub struct ForgetArgs {
422    pub memory_id: String,
423    #[serde(default = "default_forget_reason")]
424    pub reason: String,
425}
426
427fn default_forget_reason() -> String {
428    "user-initiated via MCP".into()
429}
430
431#[derive(Debug, Clone, Serialize, Deserialize)]
432pub struct InspectArgs {
433    pub memory_id: String,
434}
435
436#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct UpdateArgs {
438    pub memory_id: String,
439    pub content: String,
440}
441
442// Path 1 derived-layer tools (v0.4.0+) — query the Steward's outputs.
443// `solo_query::derived` is the single source of truth; these handlers
444// just translate JSON args to function args and serialise the result
445// vec to JSON for the MCP wire.
446
447#[derive(Debug, Clone, Serialize, Deserialize)]
448pub struct ThemesArgs {
449    /// Optional time window in days; `None` = unfiltered, return up
450    /// to `limit` most-recent themes across all time. `Some(7)` =
451    /// "themes from the last week".
452    #[serde(default)]
453    pub window_days: Option<i64>,
454    #[serde(default = "default_limit")]
455    pub limit: usize,
456}
457
458#[derive(Debug, Clone, Serialize, Deserialize)]
459pub struct FactsAboutArgs {
460    /// Subject id to query — required (predicate-only scans
461    /// intentionally not supported).
462    pub subject: String,
463    #[serde(default)]
464    pub predicate: Option<String>,
465    #[serde(default)]
466    pub since_ms: Option<i64>,
467    #[serde(default)]
468    pub until_ms: Option<i64>,
469    /// v0.5.1 Priority 8 — widen the query to also match rows where
470    /// `subject` appears as the object (e.g. surface "Sam pushes back
471    /// on PRs about Maya" under `facts_about(subject="maya")`).
472    /// Default `false` preserves v0.5.0 behaviour.
473    #[serde(default)]
474    pub include_as_object: bool,
475    #[serde(default = "default_limit")]
476    pub limit: usize,
477}
478
479#[derive(Debug, Clone, Serialize, Deserialize)]
480pub struct EntitiesArgs {
481    pub query: String,
482    #[serde(default = "default_limit")]
483    pub limit: usize,
484}
485
486#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct ContradictionsArgs {
488    #[serde(default = "default_limit")]
489    pub limit: usize,
490}
491
492fn default_contradiction_status() -> String {
493    "resolved".to_string()
494}
495
496#[derive(Debug, Clone, Serialize, Deserialize)]
497pub struct ContradictionResolveArgs {
498    pub a_id: String,
499    pub b_id: String,
500    pub kind: String,
501    #[serde(default = "default_contradiction_status")]
502    pub status: String,
503    #[serde(default)]
504    pub resolution_note: Option<String>,
505    #[serde(default)]
506    pub winning_triple_id: Option<String>,
507}
508
509/// Args for `memory_inspect_cluster` (v0.5.0 Priority 3). `cluster_id`
510/// is required; `full_content` is opt-in for the rare power-user case
511/// where 200-char-per-episode truncation is too aggressive.
512#[derive(Debug, Clone, Serialize, Deserialize)]
513pub struct InspectClusterArgs {
514    pub cluster_id: String,
515    /// If `true`, episode `content` fields are returned verbatim. If
516    /// `false` or omitted (the default), each episode's content is
517    /// truncated to `solo_query::EPISODE_TRUNCATE_CHARS` chars with a
518    /// trailing `…`.
519    #[serde(default)]
520    pub full_content: bool,
521}
522
523// Document tools (v0.7.0+). Five args structs paired with five handlers.
524// Wire shapes per `docs/dev-log/0083-v0.7.0-implementation-plan.md` §2 P5.
525
526#[derive(Debug, Clone, Serialize, Deserialize)]
527pub struct IngestDocumentArgs {
528    /// Server-side filesystem path to the file to ingest. Must be
529    /// readable by the Solo process. The writer parses the file by
530    /// extension, splits it into ~500-token chunks, embeds each, and
531    /// stores them under `documents` + `document_chunks`.
532    pub path: String,
533}
534
535#[derive(Debug, Clone, Serialize, Deserialize)]
536pub struct SearchDocsArgs {
537    pub query: String,
538    #[serde(default = "default_search_docs_limit")]
539    pub limit: usize,
540}
541
542fn default_search_docs_limit() -> usize {
543    5
544}
545
546#[derive(Debug, Clone, Serialize, Deserialize)]
547pub struct InspectDocumentArgs {
548    pub doc_id: String,
549}
550
551#[derive(Debug, Clone, Serialize, Deserialize)]
552pub struct ListDocumentsArgs {
553    #[serde(default = "default_list_documents_limit")]
554    pub limit: usize,
555    #[serde(default)]
556    pub offset: usize,
557    /// If `true`, also include documents the user has forgotten. Default
558    /// `false` matches the agent-UX expectation that recall + listing
559    /// ignore soft-deleted rows.
560    #[serde(default)]
561    pub include_forgotten: bool,
562}
563
564fn default_list_documents_limit() -> usize {
565    20
566}
567
568#[derive(Debug, Clone, Serialize, Deserialize)]
569pub struct ForgetDocumentArgs {
570    pub doc_id: String,
571}
572
573// ---------------------------------------------------------------------------
574// ServerHandler implementation
575// ---------------------------------------------------------------------------
576
577impl ServerHandler for SoloMcpServer {
578    fn get_info(&self) -> ServerInfo {
579        // rmcp 1.x: ServerInfo is non-exhaustive AND lives in another crate,
580        // so neither struct-literal nor functional-update syntax (..) is
581        // allowed from outside. Build via mut on a Default::default().
582        let capabilities = ServerCapabilities::builder().enable_tools().build();
583        let mut info = ServerInfo::default();
584        info.protocol_version = ProtocolVersion::default();
585        info.capabilities = capabilities;
586        // v0.9.1 P1 Fix 1 — `Implementation::from_build_env()` reads
587        // `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from rmcp's OWN build
588        // environment (the helper lives in rmcp, so the proc-macro
589        // expansion captures rmcp's manifest, not ours). On v0.9.0 every
590        // Solo MCP daemon self-identified as `{name: "rmcp", version: "1.7.0"}`.
591        // Pinned by `tests::server_info_identity_is_solo_not_rmcp_or_solo_api`.
592        // The literal `"solo"` (not `env!("CARGO_PKG_NAME")`) is deliberate:
593        // this crate is `solo-api`, but the operator-facing identity is
594        // the binary name `solo`.
595        info.server_info =
596            Implementation::new("solo".to_string(), env!("CARGO_PKG_VERSION").to_string());
597        info.instructions = Some(
598            "Solo gives you persistent memory across conversations \
599                 with this user — what they've told you before, the \
600                 people and projects in their life, and where their \
601                 stated beliefs have shifted, plus a library of \
602                 documents the user has ingested (notes, runbooks, \
603                 PDFs). Reach for these tools whenever the user \
604                 references something from earlier (\"like I \
605                 mentioned\", \"the project I'm working on\", \"my \
606                 friend Alex\", \"the notes I uploaded last week\") \
607                 or asks a question that hinges on personal context \
608                 or document content you don't have in the current \
609                 chat. \
610                 \n\nBest first call for agent work: memory_context \
611                 (one bounded bundle containing recall, themes, \
612                 optional facts, and contradictions). Use the \
613                 narrower tools below when you need more detail or \
614                 a specific operation. \
615                 \n\nTools to write or look up specific moments: \
616                 memory_remember (save something worth keeping), \
617                 memory_update (correct one active saved item), \
618                 memory_recall (search past conversations by topic), \
619                 memory_inspect (show one saved item by id), \
620                 memory_forget (delete one saved item). \
621                 \n\nTools for the bigger picture (populated as the \
622                 user uses Solo over time): memory_themes (recent \
623                 topics they've been thinking about), \
624                 memory_facts_about (what you know about a person, \
625                 project, or place — \"what do you know about \
626                 Alex?\"), memory_entities (discover graph entity \
627                 ids by name), memory_contradictions (places where the \
628                 user has said two things that disagree — surface \
629                 these before answering), memory_contradiction_resolve \
630                 (mark a contradiction resolved or reopened), \
631                 memory_inspect_cluster \
632                 (the raw conversations behind one summary). \
633                 \n\nTools for the user's documents: \
634                 memory_ingest_document (read a file from disk and \
635                 add it to Solo's library), memory_search_docs \
636                 (search across ingested documents by topic — use \
637                 when the user asks about something they wrote down \
638                 or saved as a file), memory_inspect_document (show \
639                 one document's metadata plus a preview of its \
640                 chunks), memory_list_documents (browse documents \
641                 by recency), memory_forget_document (drop a \
642                 document from the library)."
643                .into(),
644        );
645        info
646    }
647
648    /// v0.9.0 P2: override `initialize` so we can:
649    ///
650    ///   1. Cache the client's `InitializeRequestParams` on the peer
651    ///      (delegates to rmcp's default for this).
652    ///   2. If the tenant's `[llm] mode = "mcp_sampling"`:
653    ///      a. Refuse to initialize when the peer didn't advertise the
654    ///         `sampling` capability — surfaces the BLOCKER 2-locked
655    ///         error message so the user sees commented-out
656    ///         alternative TOML blocks.
657    ///      b. Otherwise build a `SamplingLlmClient`-backed Steward and
658    ///         write it into `tenant.steward_slot()` so the writer
659    ///         actor's next consolidate-tick reads a populated slot.
660    ///   3. For any other `[llm]` mode, return the configured tools
661    ///      surface unchanged (the slot was eagerly populated at
662    ///      registry-open time by the static StewardFactory).
663    async fn initialize(
664        &self,
665        request: InitializeRequestParams,
666        context: RequestContext<RoleServer>,
667    ) -> std::result::Result<InitializeResult, McpError> {
668        // Defer to rmcp's default for peer-info caching (matches the
669        // `if peer_info().is_none()` shape).
670        if context.peer.peer_info().is_none() {
671            context.peer.set_peer_info(request.clone());
672        }
673
674        let llm_settings = self.inner.tenant.config().llm.as_ref().cloned();
675        let peer_sampling_supported = request.capabilities.sampling.is_some();
676        match initialize_decision(&llm_settings, peer_sampling_supported) {
677            InitializeDecision::Allow => {}
678            InitializeDecision::PopulateSamplingSteward => {
679                // Build the sampling-backed Steward against the live
680                // peer + the per-tenant write handle, then write it
681                // into the slot.
682                self.populate_sampling_steward(&context).await;
683            }
684            InitializeDecision::RejectMissingSamplingCapability => {
685                return Err(McpError::invalid_request(
686                    sampling_capability_missing_error_message(),
687                    None,
688                ));
689            }
690        }
691
692        Ok(self.get_info())
693    }
694
695    async fn list_tools(
696        &self,
697        _request: Option<PaginatedRequestParam>,
698        _context: RequestContext<RoleServer>,
699    ) -> std::result::Result<ListToolsResult, McpError> {
700        Ok(ListToolsResult {
701            tools: build_tools(),
702            next_cursor: None,
703            ..Default::default()
704        })
705    }
706
707    async fn call_tool(
708        &self,
709        request: CallToolRequestParam,
710        _context: RequestContext<RoleServer>,
711    ) -> std::result::Result<CallToolResult, McpError> {
712        let CallToolRequestParam {
713            name, arguments, ..
714        } = request;
715        let args_value = serde_json::Value::Object(arguments.unwrap_or_default());
716        // v0.11.0 P3: stdio transport has no per-session broadcast
717        // channel to publish progress events through (one process =
718        // one tenant = one implicit "session" for the subprocess's
719        // lifetime). Pass `None` — handlers see it and skip the
720        // emission code paths silently.
721        self.dispatch_tool(&name, args_value, None).await
722    }
723}
724
725impl SoloMcpServer {
726    /// v0.9.0 P2: build a sampling-backed `Arc<Steward>` for the
727    /// current MCP session and write it into the tenant's
728    /// `steward_slot`. Called from [`Self::initialize`] when:
729    ///
730    ///   * `tenant.config().llm.requires_mcp_peer()` is true, AND
731    ///   * the peer advertised the `sampling` capability.
732    ///
733    /// Implementation notes:
734    ///
735    ///   * `StewardConfig::from_settings_then_env()` is parsed best-
736    ///     effort against the tenant's `SoloConfig.steward` block (the
737    ///     v0.11.1 TOML surface) AND env vars; if either is malformed,
738    ///     we fall back to `default()` and log a warning. This matches
739    ///     `daemon.rs`'s tolerance — a bad config shouldn't block an
740    ///     MCP session from initialising.
741    ///
742    ///   * The slot is OVERWRITTEN unconditionally — a fresh MCP
743    ///     session always wins. If a prior session's
744    ///     `SamplingLlmClient` had outstanding requests, they error out
745    ///     on the rmcp layer when their peer drops.
746    ///
747    ///   * The cached `audit_principal` is the one the MCP server
748    ///     constructed for this session via `resolve_mcp_principal`.
749    ///     Every `peer.create_message` call from this Steward routes
750    ///     that principal through to the per-tenant
751    ///     `AuditOperation::LlmSamplingCall` row.
752    async fn populate_sampling_steward(&self, context: &RequestContext<RoleServer>) {
753        // v0.11.1: read `[steward]` TOML overrides from the tenant's
754        // already-parsed `SoloConfig` and layer env vars on top, the
755        // same resolution order as `daemon.rs` + `common.rs`. Best-
756        // effort: a malformed value falls back to defaults rather than
757        // blocking session init.
758        let tenant_cfg = self.inner.tenant.config();
759        let steward_config = solo_steward::StewardConfig::from_settings_then_env(
760            tenant_cfg.steward.cluster_min_size,
761            tenant_cfg.steward.cluster_cosine_threshold,
762        )
763        .unwrap_or_else(|e| {
764            tracing::warn!(
765                error = %e,
766                "v0.11.1: StewardConfig::from_settings_then_env failed at MCP \
767                 initialize; falling back to defaults"
768            );
769            solo_steward::StewardConfig::default()
770        });
771        // v0.9.0 P5 (M3 wiring): read `[sampling]` from the tenant's
772        // already-parsed `SoloConfig`. `SamplingConfig::default()` lands
773        // when the block is omitted (5s window / 10 max-batch); operator
774        // overrides flow through to `build_sampling_steward` and into
775        // `SamplingCoordinator::with_settings`.
776        let sampling_config = self.inner.tenant.config().sampling.clone();
777        let peer = context.peer.clone();
778        let write_handle = self.inner.tenant.write().clone();
779        let steward = crate::llm::build_sampling_steward(
780            peer,
781            write_handle,
782            self.inner.audit_principal.clone(),
783            steward_config,
784            sampling_config.clone(),
785        );
786        let slot = self.inner.tenant.steward_slot();
787        let mut guard = slot.write().await;
788        *guard = Some(steward);
789        tracing::info!(
790            tenant = %self.inner.tenant.tenant_id(),
791            coalesce_window_ms = sampling_config.coalesce_window_ms,
792            coalesce_max_requests = sampling_config.coalesce_max_requests,
793            "v0.9.0 P5: MCP-sampling Steward attached to tenant.steward_slot \
794             (PeerSamplingClient → SamplingCoordinator → SamplingLlmClient)"
795        );
796    }
797
798    /// Direct tool-dispatch path used by both `call_tool` (the
799    /// ServerHandler trait method, behind the rmcp protocol layer) and
800    /// in-process tests that don't want to spin up a full transport pair.
801    /// Bypasses `RequestContext` (which requires a `Peer` not constructible
802    /// outside rmcp internals).
803    ///
804    /// v0.11.0 P3: `progress` is `Some` only when the HTTP transport
805    /// dispatched the request AND the client opted in via
806    /// `_meta.progressToken`. The three long-running handlers
807    /// (`memory_ingest_document`, `memory_search_docs`,
808    /// `memory_remember_batch`) consult the reporter; the other
809    /// 11 handlers ignore it (backward compat with stdio and with
810    /// HTTP clients that did not opt in).
811    pub async fn dispatch_tool(
812        &self,
813        name: &str,
814        args_value: serde_json::Value,
815        progress: Option<crate::mcp_progress::ProgressReporter>,
816    ) -> std::result::Result<CallToolResult, McpError> {
817        match name {
818            "memory_remember" => {
819                let args: RememberArgs = parse_args(&args_value)?;
820                self.handle_remember(args).await
821            }
822            "memory_remember_batch" => {
823                let args: RememberBatchArgs = parse_args(&args_value)?;
824                self.handle_remember_batch(args, progress).await
825            }
826            "memory_recall" => {
827                let args: RecallArgs = parse_args(&args_value)?;
828                self.handle_recall(args).await
829            }
830            "memory_context" => {
831                let args: MemoryContextArgs = parse_args(&args_value)?;
832                self.handle_memory_context(args).await
833            }
834            "memory_forget" => {
835                let args: ForgetArgs = parse_args(&args_value)?;
836                self.handle_forget(args).await
837            }
838            "memory_inspect" => {
839                let args: InspectArgs = parse_args(&args_value)?;
840                self.handle_inspect(args).await
841            }
842            "memory_update" => {
843                let args: UpdateArgs = parse_args(&args_value)?;
844                self.handle_update(args).await
845            }
846            "memory_themes" => {
847                let args: ThemesArgs = parse_args(&args_value)?;
848                self.handle_themes(args).await
849            }
850            "memory_facts_about" => {
851                let args: FactsAboutArgs = parse_args(&args_value)?;
852                self.handle_facts_about(args).await
853            }
854            "memory_entities" => {
855                let args: EntitiesArgs = parse_args(&args_value)?;
856                self.handle_entities(args).await
857            }
858            "memory_contradictions" => {
859                let args: ContradictionsArgs = parse_args(&args_value)?;
860                self.handle_contradictions(args).await
861            }
862            "memory_contradiction_resolve" => {
863                let args: ContradictionResolveArgs = parse_args(&args_value)?;
864                self.handle_contradiction_resolve(args).await
865            }
866            "memory_inspect_cluster" => {
867                let args: InspectClusterArgs = parse_args(&args_value)?;
868                self.handle_inspect_cluster(args).await
869            }
870            "memory_ingest_document" => {
871                let args: IngestDocumentArgs = parse_args(&args_value)?;
872                self.handle_ingest_document(args, progress).await
873            }
874            "memory_search_docs" => {
875                let args: SearchDocsArgs = parse_args(&args_value)?;
876                self.handle_search_docs(args, progress).await
877            }
878            "memory_inspect_document" => {
879                let args: InspectDocumentArgs = parse_args(&args_value)?;
880                self.handle_inspect_document(args).await
881            }
882            "memory_list_documents" => {
883                let args: ListDocumentsArgs = parse_args(&args_value)?;
884                self.handle_list_documents(args).await
885            }
886            "memory_forget_document" => {
887                let args: ForgetDocumentArgs = parse_args(&args_value)?;
888                self.handle_forget_document(args).await
889            }
890            other => Err(McpError::invalid_params(
891                format!("unknown tool `{other}`"),
892                None,
893            )),
894        }
895    }
896
897    /// List the tools this server exposes. Mirrors `ServerHandler::list_tools`
898    /// without requiring a RequestContext.
899    pub fn dispatch_list_tools(&self) -> Vec<Tool> {
900        build_tools()
901    }
902}
903
904fn parse_args<T: serde::de::DeserializeOwned>(
905    v: &serde_json::Value,
906) -> std::result::Result<T, McpError> {
907    serde_json::from_value(v.clone())
908        .map_err(|e| McpError::invalid_params(format!("invalid tool arguments: {e}"), None))
909}
910
911fn solo_to_mcp(e: solo_core::Error) -> McpError {
912    use solo_core::Error;
913    match e {
914        Error::NotFound(msg) => McpError::invalid_params(msg, None),
915        Error::InvalidInput(msg) => McpError::invalid_params(msg, None),
916        Error::Conflict(msg) => McpError::invalid_params(msg, None),
917        other => McpError::internal_error(other.to_string(), None),
918    }
919}
920
921// ---------------------------------------------------------------------------
922// Tool definitions (JSON Schema)
923// ---------------------------------------------------------------------------
924
925fn build_tools() -> Vec<Tool> {
926    vec![
927        Tool::new(
928            "memory_remember",
929            "Save something the user has told you — a fact, a \
930             preference, a name, a date, a context — so you can pick \
931             it up next conversation. Use whenever the user mentions \
932             something they'd reasonably expect you to recall later \
933             (\"I just started at Quotient\", \"my partner is Maya\"). \
934             Returns the saved item's id.",
935            json_schema_object(serde_json::json!({
936                "type": "object",
937                "properties": {
938                    "content": {
939                        "type": "string",
940                        "description": "The text to remember.",
941                    },
942                    "source_type": {
943                        "type": "string",
944                        "description": "Optional source-type tag (default: \"user_message\"). See docs/mcp/source-types.md for convention values.",
945                    },
946                    "source_id": {
947                        "type": "string",
948                        "description": "Optional upstream id for traceability.",
949                    },
950                    "salience": {
951                        "type": "number",
952                        "description": "Optional salience in [0.0, 1.0]; defaults to 0.5. Higher values bias toward recall ranking + retention. v0.9.2+.",
953                        "minimum": 0.0,
954                        "maximum": 1.0,
955                    },
956                },
957                "required": ["content"],
958            })),
959        ),
960        // v0.9.2 — atomic batched-remember for agentic clients. Wraps
961        // every item in one BEGIN IMMEDIATE tx so a single
962        // `memory_remember_batch` call either persists all N items or
963        // none. Designed for the solo-jarvis turn-flush pattern (per
964        // dev-log 0120 §1).
965        Tool::new(
966            "memory_remember_batch",
967            "Save several items atomically in one transaction — either \
968             every item lands or none does. Use this when you have a \
969             collection of related episodes from one logical step (a \
970             conversation turn, a tool-output bundle, an ingest batch) \
971             and partial success would leave the user's memory in a \
972             confusing half-state. Each item carries the same fields as \
973             memory_remember (content + optional source_type, source_id, \
974             salience). Returns an ordered array of memory_ids matching \
975             the input items. v0.9.2+.",
976            json_schema_object(serde_json::json!({
977                "type": "object",
978                "properties": {
979                    "items": {
980                        "type": "array",
981                        "description": format!(
982                            "Items to remember atomically. Max {} per call.",
983                            solo_storage::MAX_REMEMBER_BATCH_SIZE,
984                        ),
985                        "minItems": 1,
986                        // SOURCE OF TRUTH: solo_storage::MAX_REMEMBER_BATCH_SIZE.
987                        // Both the numeric `maxItems` and the human-readable
988                        // `description` above interpolate from this constant
989                        // so they can never drift. Pinned by
990                        // `remember_batch_maxitems_matches_max_batch_size`
991                        // in the test module.
992                        "maxItems": solo_storage::MAX_REMEMBER_BATCH_SIZE,
993                        "items": {
994                            "type": "object",
995                            "properties": {
996                                "content": {
997                                    "type": "string",
998                                    "description": "The text to remember.",
999                                },
1000                                "source_type": {
1001                                    "type": "string",
1002                                    "description": "Optional source-type tag (default: \"user_message\"). See docs/mcp/source-types.md.",
1003                                },
1004                                "source_id": {
1005                                    "type": "string",
1006                                    "description": "Optional upstream id for traceability.",
1007                                },
1008                                "salience": {
1009                                    "type": "number",
1010                                    "description": "Optional salience in [0.0, 1.0]; defaults to 0.5.",
1011                                    "minimum": 0.0,
1012                                    "maximum": 1.0,
1013                                },
1014                            },
1015                            "required": ["content"],
1016                        },
1017                    },
1018                },
1019                "required": ["items"],
1020            })),
1021        ),
1022        Tool::new(
1023            "memory_recall",
1024            "Search past conversations with this user by topic or \
1025             phrase. Returns up to `limit` of the closest matches, \
1026             best match first. Use when the user references \
1027             something they said before (\"that book I told you \
1028             about\", \"the bug we were debugging last week\"). \
1029             Skips items the user has deleted.",
1030            json_schema_object(serde_json::json!({
1031                "type": "object",
1032                "properties": {
1033                    "query": {
1034                        "type": "string",
1035                        "description": "The query text.",
1036                    },
1037                    "limit": {
1038                        "type": "integer",
1039                        "description": "Maximum results (default 5).",
1040                        "minimum": 1,
1041                        "maximum": 100,
1042                    },
1043                },
1044                "required": ["query"],
1045            })),
1046        ),
1047        Tool::new(
1048            "memory_context",
1049            "Build a compact working-memory bundle for an agent turn. \
1050             Use this near the start of a substantial answer or task \
1051             when remembered context may matter. It combines raw \
1052             episodic recall, recent themes, optional structured facts \
1053             about `subject`, and known contradictions so clients can \
1054             ground answers without making four separate calls.",
1055            json_schema_object(serde_json::json!({
1056                "type": "object",
1057                "properties": {
1058                    "query": {
1059                        "type": "string",
1060                        "description": "Natural-language query for episodic recall.",
1061                    },
1062                    "subject": {
1063                        "type": "string",
1064                        "description": "Optional subject for structured facts. When present, facts also match object-position references.",
1065                    },
1066                    "window_days": {
1067                        "type": "integer",
1068                        "description": "Optional recency window in days for themes. Omit for unfiltered.",
1069                        "minimum": 1,
1070                    },
1071                    "limit": {
1072                        "type": "integer",
1073                        "description": "Per-section maximum results (default 5).",
1074                        "minimum": 1,
1075                        "maximum": 100,
1076                    },
1077                },
1078                "required": ["query"],
1079            })),
1080        ),
1081        Tool::new(
1082            "memory_forget",
1083            "Delete one saved item by id. Use when the user asks you \
1084             to forget something specific (\"forget that I said \
1085             X\"). The item stops appearing in future recalls. \
1086             Reversible only via backups.",
1087            json_schema_object(serde_json::json!({
1088                "type": "object",
1089                "properties": {
1090                    "memory_id": {
1091                        "type": "string",
1092                        "description": "MemoryId to forget (UUID v7).",
1093                    },
1094                    "reason": {
1095                        "type": "string",
1096                        "description": "Optional free-form reason (logged, not yet persisted).",
1097                    },
1098                },
1099                "required": ["memory_id"],
1100            })),
1101        ),
1102        Tool::new(
1103            "memory_inspect",
1104            "Show the full record for one saved item — when it was \
1105             saved, where it came from, and the full text. Use after \
1106             memory_recall when you want the complete content of a \
1107             specific hit (recall results may be truncated).",
1108            json_schema_object(serde_json::json!({
1109                "type": "object",
1110                "properties": {
1111                    "memory_id": {
1112                        "type": "string",
1113                        "description": "MemoryId to inspect (UUID v7).",
1114                    },
1115                },
1116                "required": ["memory_id"],
1117            })),
1118        ),
1119        Tool::new(
1120            "memory_update",
1121            "Correct one active saved memory and refresh its embedding \
1122             and search index entry. Use when the user says a remembered \
1123             episode is wrong or outdated and provides the corrected \
1124             wording. Returns the updated memory id, rowid, content, and \
1125             timestamp.",
1126            json_schema_object(serde_json::json!({
1127                "type": "object",
1128                "properties": {
1129                    "memory_id": {
1130                        "type": "string",
1131                        "description": "MemoryId to update (UUID v7).",
1132                    },
1133                    "content": {
1134                        "type": "string",
1135                        "description": "Replacement content for the active memory.",
1136                        "minLength": 1,
1137                    },
1138                },
1139                "required": ["memory_id", "content"],
1140            })),
1141        ),
1142        // Path 1 derived-layer tools (v0.4.0+) — query the Steward's
1143        // outputs. These are populated by `solo consolidate` and were
1144        // previously unreadable except via direct SQL.
1145        Tool::new(
1146            "memory_themes",
1147            "Recent topics the user has been thinking about. Use to \
1148             orient yourself at the start of a conversation, or when \
1149             the user asks \"what have I been up to\" / \"what was I \
1150             working on last week\". Pass `window_days` to scope \
1151             (e.g. 7 for last week); omit for all-time.",
1152            json_schema_object(serde_json::json!({
1153                "type": "object",
1154                "properties": {
1155                    "window_days": {
1156                        "type": "integer",
1157                        "description": "Optional time window in days. Omit for unfiltered.",
1158                        "minimum": 1,
1159                    },
1160                    "limit": {
1161                        "type": "integer",
1162                        "description": "Maximum results (default 5).",
1163                        "minimum": 1,
1164                        "maximum": 100,
1165                    },
1166                },
1167                "required": [],
1168            })),
1169        ),
1170        Tool::new(
1171            "memory_facts_about",
1172            "Look up what you remember about a person, project, or \
1173             topic — names, dates, preferences, relationships. Use \
1174             when the user asks \"what do you know about Alex?\", \
1175             \"when did I start at Quotient?\", \"who is Maya?\", or \
1176             whenever you need grounded facts about someone or \
1177             something before answering. Subject is required (the \
1178             person/place/thing you're asking about); narrow further \
1179             with `predicate` (\"works_at\", \"lives_in\") or a date \
1180             range. Set `include_as_object=true` to also surface \
1181             facts where the subject appears on the receiving side of \
1182             a relationship (e.g. \"Sam pushes back on PRs about \
1183             Maya\" surfaces under facts_about(subject=\"Maya\", \
1184             include_as_object=true)). (Backed by \
1185             subject-predicate-object triples distilled from past \
1186             conversations.) Clients should set a 30s timeout on this \
1187             call; if exceeded, retry once or fall back to \
1188             `memory_recall`.",
1189            json_schema_object(serde_json::json!({
1190                "type": "object",
1191                "properties": {
1192                    "subject": {
1193                        "type": "string",
1194                        "description": "Subject id to query (e.g. 'Sam').",
1195                    },
1196                    "predicate": {
1197                        "type": "string",
1198                        "description": "Optional predicate filter (e.g. 'works_at').",
1199                    },
1200                    "since_ms": {
1201                        "type": "integer",
1202                        "description": "Optional valid_from_ms lower bound (epoch ms).",
1203                    },
1204                    "until_ms": {
1205                        "type": "integer",
1206                        "description": "Optional valid_to_ms upper bound (epoch ms). NULL upper bounds (still-valid facts) pass through.",
1207                    },
1208                    "include_as_object": {
1209                        "type": "boolean",
1210                        "description": "If true, also match facts where `subject` appears as the object (e.g. 'Sam pushes back on PRs about Maya' surfaces under subject='Maya'). Default false.",
1211                        "default": false,
1212                    },
1213                    "limit": {
1214                        "type": "integer",
1215                        "description": "Maximum results (default 5).",
1216                        "minimum": 1,
1217                        "maximum": 100,
1218                    },
1219                },
1220                "required": ["subject"],
1221            })),
1222        ),
1223        Tool::new(
1224            "memory_entities",
1225            "Discover entity ids from the structured-fact graph. Use \
1226             before memory_facts_about when you are not sure how a \
1227             person, project, or topic is keyed in memory, or when the \
1228             user gives a partial name. Returns entity ids with fact \
1229             counts and common predicates.",
1230            json_schema_object(serde_json::json!({
1231                "type": "object",
1232                "properties": {
1233                    "query": {
1234                        "type": "string",
1235                        "description": "Partial or exact entity id to search for.",
1236                        "minLength": 1,
1237                    },
1238                    "limit": {
1239                        "type": "integer",
1240                        "description": "Maximum results (default 5).",
1241                        "minimum": 1,
1242                        "maximum": 100,
1243                    },
1244                },
1245                "required": ["query"],
1246            })),
1247        ),
1248        Tool::new(
1249            "memory_contradictions",
1250            "Find places where the user's stated beliefs or facts \
1251             disagree across conversations — flag disagreements \
1252             before answering. Use whenever you're about to rely on \
1253             a remembered fact that could have changed (jobs, \
1254             relationships, preferences, opinions); a disagreement \
1255             here means the user has told you both X and not-X over \
1256             time and you should ask which is current instead of \
1257             guessing. Each result shows both conflicting statements \
1258             with the topic.",
1259            json_schema_object(serde_json::json!({
1260                "type": "object",
1261                "properties": {
1262                    "limit": {
1263                        "type": "integer",
1264                        "description": "Maximum results (default 5).",
1265                        "minimum": 1,
1266                        "maximum": 100,
1267                    },
1268                },
1269                "required": [],
1270            })),
1271        ),
1272        Tool::new(
1273            "memory_contradiction_resolve",
1274            "Mark one flagged contradiction as resolved, unresolved, \
1275             or reopened. Use after the user clarifies which side is \
1276             current. Pass the a_id, b_id, and kind from \
1277             memory_contradictions; status defaults to resolved.",
1278            json_schema_object(serde_json::json!({
1279                "type": "object",
1280                "properties": {
1281                    "a_id": {
1282                        "type": "string",
1283                        "description": "First contradiction id from memory_contradictions.",
1284                    },
1285                    "b_id": {
1286                        "type": "string",
1287                        "description": "Second contradiction id from memory_contradictions.",
1288                    },
1289                    "kind": {
1290                        "type": "string",
1291                        "description": "Contradiction kind from memory_contradictions.",
1292                    },
1293                    "status": {
1294                        "type": "string",
1295                        "enum": ["unresolved", "resolved", "reopened"],
1296                        "default": "resolved",
1297                        "description": "New lifecycle status.",
1298                    },
1299                    "resolution_note": {
1300                        "type": "string",
1301                        "description": "Optional human-readable clarification.",
1302                    },
1303                    "winning_triple_id": {
1304                        "type": "string",
1305                        "description": "Optional triple id to treat as the current/correct side.",
1306                    },
1307                },
1308                "required": ["a_id", "b_id", "kind"],
1309            })),
1310        ),
1311        Tool::new(
1312            "memory_inspect_cluster",
1313            "Show the raw conversations behind one summary. Returns \
1314             the one-line topic (the LLM-generated summary) and the \
1315             source conversations the topic was built from. Use \
1316             after memory_themes when the user asks \"show me the \
1317             raw context behind this\" or \"why does Solo think \
1318             that about cluster Y\". Source items are truncated to \
1319             200 chars unless `full_content` is set.",
1320            json_schema_object(serde_json::json!({
1321                "type": "object",
1322                "properties": {
1323                    "cluster_id": {
1324                        "type": "string",
1325                        "description": "Cluster id to inspect (from memory_themes hits).",
1326                    },
1327                    "full_content": {
1328                        "type": "boolean",
1329                        "description": "If true, episode content is returned verbatim. Default false (truncate to 200 chars + ellipsis).",
1330                    },
1331                },
1332                "required": ["cluster_id"],
1333            })),
1334        ),
1335        // Document tools (v0.7.0+). RAG over user-supplied files —
1336        // markdown notes, PDFs, runbooks, code, etc. Same vector space
1337        // as episodes; same embedder; same HNSW index.
1338        Tool::new(
1339            "memory_ingest_document",
1340            "Read a file from disk and add it to the user's document \
1341             library so it becomes searchable alongside past \
1342             conversations. Use when the user asks you to remember a \
1343             whole file (\"add my notes/runbook.md\", \"ingest this \
1344             PDF\"). The file is split into ~500-token chunks and \
1345             each chunk is embedded; chunks then surface through \
1346             memory_search_docs. Returns the new document id, chunk \
1347             count, and a `deduped` flag (true if the same content \
1348             was already ingested under another id).",
1349            json_schema_object(serde_json::json!({
1350                "type": "object",
1351                "properties": {
1352                    "path": {
1353                        "type": "string",
1354                        "description": "Server-side absolute path to the file to ingest. The file must be readable by the Solo process.",
1355                    },
1356                },
1357                "required": ["path"],
1358            })),
1359        ),
1360        Tool::new(
1361            "memory_search_docs",
1362            "Search across the user's ingested documents by topic or \
1363             phrase. Returns up to `limit` matching chunks, best \
1364             match first, each with the parent document's title + \
1365             source path so you can cite where the answer came from. \
1366             Use when the user asks a question that hinges on \
1367             material they've added as a file (\"what does my \
1368             runbook say about backups?\", \"find the section in the \
1369             notes about the new policy\"). Forgotten documents are \
1370             skipped.",
1371            json_schema_object(serde_json::json!({
1372                "type": "object",
1373                "properties": {
1374                    "query": {
1375                        "type": "string",
1376                        "description": "The query text.",
1377                    },
1378                    "limit": {
1379                        "type": "integer",
1380                        "description": "Maximum results (default 5).",
1381                        "minimum": 1,
1382                        "maximum": 100,
1383                    },
1384                },
1385                "required": ["query"],
1386            })),
1387        ),
1388        Tool::new(
1389            "memory_inspect_document",
1390            "Show one document's metadata plus a preview of every \
1391             chunk it was split into. Use after memory_search_docs \
1392             when the user wants the bigger picture for one hit \
1393             (\"show me the whole document this came from\"), or \
1394             after memory_list_documents to drill into one entry. \
1395             Each chunk preview is truncated to 200 chars.",
1396            json_schema_object(serde_json::json!({
1397                "type": "object",
1398                "properties": {
1399                    "doc_id": {
1400                        "type": "string",
1401                        "description": "Document id to inspect (UUID v7).",
1402                    },
1403                },
1404                "required": ["doc_id"],
1405            })),
1406        ),
1407        Tool::new(
1408            "memory_list_documents",
1409            "List the user's ingested documents, newest first. Use \
1410             when the user asks \"what documents have I added?\" or \
1411             \"show me my files\". Returns a paginated index — pass \
1412             `offset` to page further back. Forgotten documents are \
1413             hidden by default; set `include_forgotten=true` to see \
1414             them too.",
1415            json_schema_object(serde_json::json!({
1416                "type": "object",
1417                "properties": {
1418                    "limit": {
1419                        "type": "integer",
1420                        "description": "Maximum results per page (default 20).",
1421                        "minimum": 1,
1422                        "maximum": 100,
1423                    },
1424                    "offset": {
1425                        "type": "integer",
1426                        "description": "Number of rows to skip (for paging). Default 0.",
1427                        "minimum": 0,
1428                    },
1429                    "include_forgotten": {
1430                        "type": "boolean",
1431                        "description": "If true, also include documents the user has forgotten. Default false.",
1432                    },
1433                },
1434            })),
1435        ),
1436        Tool::new(
1437            "memory_forget_document",
1438            "Drop one document from the user's library by id. Use \
1439             when the user asks you to forget a specific file \
1440             (\"forget my old runbook\"). The document's chunks stop \
1441             appearing in memory_search_docs and the vectors are \
1442             tombstoned in the index. The chunk rows themselves are \
1443             kept for forensic value (a future restore command can \
1444             undo this).",
1445            json_schema_object(serde_json::json!({
1446                "type": "object",
1447                "properties": {
1448                    "doc_id": {
1449                        "type": "string",
1450                        "description": "Document id to forget (UUID v7).",
1451                    },
1452                },
1453                "required": ["doc_id"],
1454            })),
1455        ),
1456    ]
1457}
1458
1459fn json_schema_object(value: serde_json::Value) -> serde_json::Map<String, serde_json::Value> {
1460    match value {
1461        serde_json::Value::Object(map) => map,
1462        _ => panic!("json_schema_object: input must be an object"),
1463    }
1464}
1465
1466/// Names of every tool this server exposes, in registration order.
1467///
1468/// Exposed for cross-crate consumers (notably `solo doctor
1469/// --check-mcp-compat`) that want the name list without paying the
1470/// cost of building full `rmcp::Tool` records (which allocate JSON
1471/// schemas). The registration order matches `build_tools()` so any
1472/// drift between the two would be caught by the cross-provider regex
1473/// test which iterates `build_tools()`.
1474pub fn tool_names() -> Vec<&'static str> {
1475    vec![
1476        "memory_remember",
1477        // v0.9.2 — batched-remember for agentic clients (solo-jarvis).
1478        "memory_remember_batch",
1479        "memory_recall",
1480        "memory_context",
1481        "memory_forget",
1482        "memory_inspect",
1483        "memory_update",
1484        "memory_themes",
1485        "memory_facts_about",
1486        "memory_entities",
1487        "memory_contradictions",
1488        "memory_contradiction_resolve",
1489        "memory_inspect_cluster",
1490        // Document tools added in v0.7.0:
1491        "memory_ingest_document",
1492        "memory_search_docs",
1493        "memory_inspect_document",
1494        "memory_list_documents",
1495        "memory_forget_document",
1496    ]
1497}
1498
1499// ---------------------------------------------------------------------------
1500// Tool handlers
1501// ---------------------------------------------------------------------------
1502
1503impl SoloMcpServer {
1504    async fn handle_remember(
1505        &self,
1506        args: RememberArgs,
1507    ) -> std::result::Result<CallToolResult, McpError> {
1508        let content = args.content.trim_end().to_string();
1509        if content.is_empty() {
1510            return Err(McpError::invalid_params(
1511                "memory_remember: content must not be empty".to_string(),
1512                None,
1513            ));
1514        }
1515        validate_salience(args.salience)?;
1516        let embedding: solo_core::Embedding = self
1517            .inner
1518            .tenant
1519            .embedder()
1520            .embed(&content)
1521            .await
1522            .map_err(solo_to_mcp)?;
1523        let episode = Episode {
1524            memory_id: MemoryId::new(),
1525            ts_ms: chrono::Utc::now().timestamp_millis(),
1526            source_type: args.source_type.unwrap_or_else(|| "user_message".into()),
1527            source_id: args.source_id,
1528            content,
1529            encoding_context: EncodingContext::default(),
1530            provenance: None,
1531            confidence: Confidence::new(0.9).expect("0.9 is in [0.0, 1.0]"),
1532            strength: 0.5,
1533            // v0.9.2: caller-supplied salience overrides the default. The
1534            // `validate_salience` call above has already rejected NaN /
1535            // out-of-range values.
1536            salience: args.salience.unwrap_or(0.5),
1537            tier: Tier::Hot,
1538        };
1539        let mid = self
1540            .inner
1541            .tenant
1542            .write()
1543            .remember_as(self.inner.audit_principal.clone(), episode, embedding)
1544            .await
1545            .map_err(solo_to_mcp)?;
1546        Ok(CallToolResult::success(vec![Content::text(format!(
1547            "remembered {mid}"
1548        ))]))
1549    }
1550
1551    /// v0.9.2 — handler for `memory_remember_batch`.
1552    ///
1553    /// Pipeline (mirrors `handle_remember` over N items):
1554    ///   1. Validate batch (non-empty, ≤ `MAX_REMEMBER_BATCH_SIZE`,
1555    ///      per-item content non-empty, per-item salience in [0.0, 1.0]).
1556    ///   2. Embed all items sequentially via the tenant's embedder.
1557    ///      We don't `join_all` here because the in-process embedder
1558    ///      paths today (stub, local-Anthropic, OpenAI) are individually
1559    ///      fast and serial is robust against rate-limit surprises (per
1560    ///      dev-log 0120 §8 R2 mitigation: existing embedder
1561    ///      throttling guards parallel fan-out; serial gives identical
1562    ///      semantics with simpler error paths). Parallel fan-out is a
1563    ///      v0.9.3 optimization once the batch tool has live traffic.
1564    ///   3. Build `Vec<(Episode, Embedding)>` with default Confidence /
1565    ///      strength / tier — same shape as single-Remember.
1566    ///   4. Dispatch via `WriteHandle::remember_batch_as`, which wraps
1567    ///      every INSERT in ONE `BEGIN IMMEDIATE` tx (ADR-0003 invariant
1568    ///      preserved).
1569    ///   5. Reply is `Vec<MemoryId>` in input order; serialise to JSON.
1570    async fn handle_remember_batch(
1571        &self,
1572        args: RememberBatchArgs,
1573        progress: Option<crate::mcp_progress::ProgressReporter>,
1574    ) -> std::result::Result<CallToolResult, McpError> {
1575        // 1. Batch-shape validation. The writer-actor will re-check
1576        //    `MAX_REMEMBER_BATCH_SIZE` (dev-log 0120 §3 Decision F) and
1577        //    reject with `InvalidInput` — we mirror the check here to
1578        //    avoid the round-trip into the writer + the embedder calls
1579        //    when the request is obviously over-cap.
1580        if args.items.is_empty() {
1581            return Err(McpError::invalid_params(
1582                "memory_remember_batch: items must not be empty".to_string(),
1583                None,
1584            ));
1585        }
1586        if args.items.len() > solo_storage::MAX_REMEMBER_BATCH_SIZE {
1587            return Err(McpError::invalid_params(
1588                format!(
1589                    "memory_remember_batch: {} items exceeds MAX_REMEMBER_BATCH_SIZE = {}",
1590                    args.items.len(),
1591                    solo_storage::MAX_REMEMBER_BATCH_SIZE,
1592                ),
1593                None,
1594            ));
1595        }
1596        for (i, item) in args.items.iter().enumerate() {
1597            if item.content.trim_end().is_empty() {
1598                return Err(McpError::invalid_params(
1599                    format!("memory_remember_batch: items[{i}].content must not be empty"),
1600                    None,
1601                ));
1602            }
1603            validate_salience(item.salience).map_err(|e| {
1604                // Re-wrap with the index so the caller can pinpoint
1605                // which item tripped the validator.
1606                McpError::invalid_params(
1607                    format!("memory_remember_batch: items[{i}].{}", e.message),
1608                    None,
1609                )
1610            })?;
1611        }
1612
1613        // v0.11.0 P3: progress emission is gated on batch size — below
1614        // the threshold (50 items) the wire-overhead of progress
1615        // notifications outweighs the UX benefit. Above threshold +
1616        // client opted in (`reporter.is_some()`), emit one event per
1617        // `MCP_REMEMBER_BATCH_PROGRESS_EMIT_EVERY` items during the
1618        // embed loop + one terminal "embedded" + one "inserted" event.
1619        let total = args.items.len() as u64;
1620        let progress_active = progress.is_some()
1621            && args.items.len() > crate::mcp_progress::MCP_REMEMBER_BATCH_PROGRESS_ITEM_THRESHOLD;
1622        let progress_reporter = if progress_active {
1623            progress.as_ref()
1624        } else {
1625            None
1626        };
1627
1628        // 2. Embed each item. Serial fan-out (see doc comment above).
1629        let embedder = self.inner.tenant.embedder();
1630        let now_ms = chrono::Utc::now().timestamp_millis();
1631        let mut pairs: Vec<(Episode, solo_core::Embedding)> = Vec::with_capacity(args.items.len());
1632        for (i, item) in args.items.into_iter().enumerate() {
1633            let content = item.content.trim_end().to_string();
1634            let embedding = embedder.embed(&content).await.map_err(solo_to_mcp)?;
1635            let episode = Episode {
1636                memory_id: MemoryId::new(),
1637                ts_ms: now_ms,
1638                source_type: item.source_type.unwrap_or_else(|| "user_message".into()),
1639                source_id: item.source_id,
1640                content,
1641                encoding_context: EncodingContext::default(),
1642                provenance: None,
1643                confidence: Confidence::new(0.9).expect("0.9 is in [0.0, 1.0]"),
1644                strength: 0.5,
1645                salience: item.salience.unwrap_or(0.5),
1646                tier: Tier::Hot,
1647            };
1648            pairs.push((episode, embedding));
1649            // v0.11.0 P3 checkpoint A — embed progress, every N items.
1650            // `(i + 1) % EMIT_EVERY == 0` emits at items 25, 50, 75, ...
1651            // The terminal "embedded" event below covers any remainder.
1652            let done = (i + 1) as u64;
1653            if (i + 1) % crate::mcp_progress::MCP_REMEMBER_BATCH_PROGRESS_EMIT_EVERY == 0 {
1654                crate::mcp_progress::report_if_some(
1655                    progress_reporter,
1656                    done,
1657                    Some(total),
1658                    Some("embedding"),
1659                );
1660            }
1661        }
1662
1663        // v0.11.0 P3 checkpoint B — all items embedded; about to land
1664        // in writer-actor. Always-emitted (when progress_active) so a
1665        // batch that wasn't a multiple of EMIT_EVERY still gets a
1666        // final embed-phase event.
1667        crate::mcp_progress::report_if_some(
1668            progress_reporter,
1669            total,
1670            Some(total),
1671            Some("embedded"),
1672        );
1673
1674        // 3. Dispatch into the writer-actor. The batch lands as one tx.
1675        let memory_ids = self
1676            .inner
1677            .tenant
1678            .write()
1679            .remember_batch_as(self.inner.audit_principal.clone(), pairs)
1680            .await
1681            .map_err(solo_to_mcp)?;
1682
1683        // v0.11.0 P3 checkpoint C — writer-actor committed. The reply
1684        // body below also lands in the POST response, but this event
1685        // gives a client subscribed to the GET stream early confirmation
1686        // that the row is committed without waiting for the POST to
1687        // return (network buffering can stall the POST response
1688        // marginally; the SSE event is immediate).
1689        crate::mcp_progress::report_if_some(
1690            progress_reporter,
1691            total,
1692            Some(total),
1693            Some("inserted"),
1694        );
1695
1696        // 4. Reply: JSON-serialised array of memory ids in input order.
1697        //    Stringified so MCP clients see UUID strings (matches single
1698        //    `memory_remember`'s reply shape — both speak strings on
1699        //    the wire).
1700        let ids_as_strings: Vec<String> = memory_ids.iter().map(|m| m.to_string()).collect();
1701        let body = serde_json::to_string(&ids_as_strings)
1702            .map_err(|e| McpError::internal_error(format!("serialize batch reply: {e}"), None))?;
1703        Ok(CallToolResult::success(vec![Content::text(body)]))
1704    }
1705
1706    async fn handle_recall(
1707        &self,
1708        args: RecallArgs,
1709    ) -> std::result::Result<CallToolResult, McpError> {
1710        // Pipeline lives in solo-query; the transport just formats the
1711        // result. solo_query::run_recall validates empty queries
1712        // (returns InvalidInput → invalid_params via solo_to_mcp).
1713        let result = solo_query::run_recall(
1714            self.inner.tenant.as_ref(),
1715            self.inner.audit_principal.clone(),
1716            &args.query,
1717            args.limit,
1718        )
1719        .await
1720        .map_err(solo_to_mcp)?;
1721
1722        // Always return a JSON array of hits (possibly empty) so clients
1723        // can `JSON.parse` uniformly. The previous shape returned a
1724        // plain-English string ("no matches (index has N vectors)") on
1725        // empty results, which broke any client parsing recall as JSON.
1726        // The `index_len` diagnostic is preserved as an MCP `Content` text
1727        // alongside the JSON payload — agents see both; tooling parses the
1728        // first content as JSON.
1729        let body = serde_json::to_string_pretty(&result.hits).unwrap_or_else(|_| "[]".to_string());
1730        let mut contents = vec![Content::text(body)];
1731        if result.hits.is_empty() {
1732            contents.push(Content::text(format!(
1733                "(index has {} vectors)",
1734                result.index_len
1735            )));
1736        }
1737        Ok(CallToolResult::success(contents))
1738    }
1739
1740    async fn handle_memory_context(
1741        &self,
1742        args: MemoryContextArgs,
1743    ) -> std::result::Result<CallToolResult, McpError> {
1744        let result = solo_query::memory_context(
1745            self.inner.tenant.as_ref(),
1746            self.inner.audit_principal.clone(),
1747            &args.query,
1748            args.subject.as_deref(),
1749            &self.inner.user_aliases,
1750            args.window_days,
1751            args.limit,
1752        )
1753        .await
1754        .map_err(solo_to_mcp)?;
1755        let body = serde_json::to_string_pretty(&result).unwrap_or_else(|_| String::new());
1756        Ok(CallToolResult::success(vec![Content::text(body)]))
1757    }
1758
1759    async fn handle_forget(
1760        &self,
1761        args: ForgetArgs,
1762    ) -> std::result::Result<CallToolResult, McpError> {
1763        let mid = MemoryId::from_str(&args.memory_id)
1764            .map_err(|e| McpError::invalid_params(format!("invalid memory_id: {e}"), None))?;
1765        self.inner
1766            .tenant
1767            .write()
1768            .forget_as(self.inner.audit_principal.clone(), mid, args.reason)
1769            .await
1770            .map_err(solo_to_mcp)?;
1771        Ok(CallToolResult::success(vec![Content::text(format!(
1772            "forgotten {mid}"
1773        ))]))
1774    }
1775
1776    async fn handle_inspect(
1777        &self,
1778        args: InspectArgs,
1779    ) -> std::result::Result<CallToolResult, McpError> {
1780        let mid = MemoryId::from_str(&args.memory_id)
1781            .map_err(|e| McpError::invalid_params(format!("invalid memory_id: {e}"), None))?;
1782        // Pipeline lives in solo-query::inspect; transports just format.
1783        let row = solo_query::inspect_one(
1784            self.inner.tenant.read(),
1785            self.inner.tenant.audit(),
1786            self.inner.audit_principal.clone(),
1787            mid,
1788        )
1789        .await
1790        .map_err(solo_to_mcp)?;
1791        let body = serde_json::to_string_pretty(&row).unwrap_or_else(|_| String::new());
1792        Ok(CallToolResult::success(vec![Content::text(body)]))
1793    }
1794
1795    async fn handle_update(
1796        &self,
1797        args: UpdateArgs,
1798    ) -> std::result::Result<CallToolResult, McpError> {
1799        let mid = MemoryId::from_str(&args.memory_id)
1800            .map_err(|e| McpError::invalid_params(format!("invalid memory_id: {e}"), None))?;
1801        if args.content.trim().is_empty() {
1802            return Err(McpError::invalid_params(
1803                "memory_update: content must not be empty".to_string(),
1804                None,
1805            ));
1806        }
1807        let result = solo_query::memory_update(
1808            self.inner.tenant.as_ref(),
1809            self.inner.audit_principal.clone(),
1810            mid,
1811            &args.content,
1812        )
1813        .await
1814        .map_err(solo_to_mcp)?;
1815        let body = serde_json::to_string_pretty(&result).unwrap_or_else(|_| String::new());
1816        Ok(CallToolResult::success(vec![Content::text(body)]))
1817    }
1818
1819    // Path 1 derived-layer handlers (v0.4.0+). Each one delegates to a
1820    // single solo-query::derived pipeline and serialises the result Vec
1821    // to pretty JSON for the MCP wire. Empty result → JSON empty array
1822    // `[]` (not a special-case "no matches" string) so MCP clients can
1823    // parse uniformly.
1824
1825    async fn handle_themes(
1826        &self,
1827        args: ThemesArgs,
1828    ) -> std::result::Result<CallToolResult, McpError> {
1829        let hits = solo_query::themes(
1830            self.inner.tenant.read(),
1831            self.inner.tenant.audit(),
1832            self.inner.audit_principal.clone(),
1833            args.window_days,
1834            args.limit,
1835        )
1836        .await
1837        .map_err(solo_to_mcp)?;
1838        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1839        Ok(CallToolResult::success(vec![Content::text(body)]))
1840    }
1841
1842    async fn handle_facts_about(
1843        &self,
1844        args: FactsAboutArgs,
1845    ) -> std::result::Result<CallToolResult, McpError> {
1846        if args.subject.trim().is_empty() {
1847            return Err(McpError::invalid_params(
1848                "memory_facts_about: subject must not be empty".to_string(),
1849                None,
1850            ));
1851        }
1852        let hits = solo_query::facts_about(
1853            self.inner.tenant.read(),
1854            self.inner.tenant.audit(),
1855            self.inner.audit_principal.clone(),
1856            &args.subject,
1857            &self.inner.user_aliases,
1858            args.include_as_object,
1859            args.predicate.as_deref(),
1860            args.since_ms,
1861            args.until_ms,
1862            args.limit,
1863        )
1864        .await
1865        .map_err(solo_to_mcp)?;
1866        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1867        Ok(CallToolResult::success(vec![Content::text(body)]))
1868    }
1869
1870    async fn handle_entities(
1871        &self,
1872        args: EntitiesArgs,
1873    ) -> std::result::Result<CallToolResult, McpError> {
1874        if args.query.trim().is_empty() {
1875            return Err(McpError::invalid_params(
1876                "memory_entities: query must not be empty".to_string(),
1877                None,
1878            ));
1879        }
1880        let hits = solo_query::entities(
1881            self.inner.tenant.read(),
1882            self.inner.tenant.audit(),
1883            self.inner.audit_principal.clone(),
1884            &args.query,
1885            args.limit,
1886        )
1887        .await
1888        .map_err(solo_to_mcp)?;
1889        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1890        Ok(CallToolResult::success(vec![Content::text(body)]))
1891    }
1892
1893    async fn handle_contradictions(
1894        &self,
1895        args: ContradictionsArgs,
1896    ) -> std::result::Result<CallToolResult, McpError> {
1897        let hits = solo_query::contradictions(
1898            self.inner.tenant.read(),
1899            self.inner.tenant.audit(),
1900            self.inner.audit_principal.clone(),
1901            args.limit,
1902        )
1903        .await
1904        .map_err(solo_to_mcp)?;
1905        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
1906        Ok(CallToolResult::success(vec![Content::text(body)]))
1907    }
1908
1909    async fn handle_contradiction_resolve(
1910        &self,
1911        args: ContradictionResolveArgs,
1912    ) -> std::result::Result<CallToolResult, McpError> {
1913        if args.a_id.trim().is_empty() || args.b_id.trim().is_empty() || args.kind.trim().is_empty()
1914        {
1915            return Err(McpError::invalid_params(
1916                "memory_contradiction_resolve: a_id, b_id, and kind must not be empty".to_string(),
1917                None,
1918            ));
1919        }
1920        // Dev-log 0152 H1: routed through the writer actor so the
1921        // UPDATE + audit row are atomic. The signature still takes
1922        // reader-pool + audit for now (deprecated; ignored by the
1923        // function body).
1924        let result = solo_query::resolve_contradiction(
1925            self.inner.tenant.write(),
1926            self.inner.tenant.read(),
1927            self.inner.tenant.audit(),
1928            self.inner.audit_principal.clone(),
1929            &args.a_id,
1930            &args.b_id,
1931            &args.kind,
1932            &args.status,
1933            args.resolution_note.as_deref(),
1934            args.winning_triple_id.as_deref(),
1935        )
1936        .await
1937        .map_err(solo_to_mcp)?;
1938        let body = serde_json::to_string_pretty(&result).unwrap_or_else(|_| String::new());
1939        Ok(CallToolResult::success(vec![Content::text(body)]))
1940    }
1941
1942    async fn handle_inspect_cluster(
1943        &self,
1944        args: InspectClusterArgs,
1945    ) -> std::result::Result<CallToolResult, McpError> {
1946        if args.cluster_id.trim().is_empty() {
1947            return Err(McpError::invalid_params(
1948                "memory_inspect_cluster: cluster_id must not be empty".to_string(),
1949                None,
1950            ));
1951        }
1952        // `solo_to_mcp` maps `Error::NotFound` → `invalid_params` for
1953        // MCP (the protocol does not have a separate "not found" error
1954        // shape; clients see the message verbatim, which includes the
1955        // cluster_id).
1956        let record = solo_query::inspect_cluster(
1957            self.inner.tenant.read(),
1958            self.inner.tenant.audit(),
1959            self.inner.audit_principal.clone(),
1960            &args.cluster_id,
1961            args.full_content,
1962        )
1963        .await
1964        .map_err(solo_to_mcp)?;
1965        let body = serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
1966        Ok(CallToolResult::success(vec![Content::text(body)]))
1967    }
1968
1969    // Document handlers (v0.7.0+). Each wraps the corresponding writer
1970    // / query API; the MCP wire shape is plain JSON serialisation of
1971    // the returned report / records.
1972
1973    async fn handle_ingest_document(
1974        &self,
1975        args: IngestDocumentArgs,
1976        progress: Option<crate::mcp_progress::ProgressReporter>,
1977    ) -> std::result::Result<CallToolResult, McpError> {
1978        if args.path.trim().is_empty() {
1979            return Err(McpError::invalid_params(
1980                "memory_ingest_document: path must not be empty".to_string(),
1981                None,
1982            ));
1983        }
1984        let path = std::path::PathBuf::from(args.path);
1985        // Defaults match what the daemon uses today (target 500 tokens,
1986        // 50-token overlap). Future: thread a per-call override through
1987        // the args struct if a use case appears.
1988        let chunk_config = solo_storage::document::ChunkConfig::default();
1989
1990        // v0.11.0 P3: ingest checkpoints. The writer-actor's
1991        // `ingest_document_as` is one opaque command that internally
1992        // performs parse → chunk → embed → SQL insert; we bookend it
1993        // with phase-marker progress events. The 4-phase taxonomy
1994        // matches the MCP spec brief — `total=4`, `progress` walks 1
1995        // → 4 — even though phases 1 and 2 (parse, chunk) emit before
1996        // the writer call and 3 and 4 (embed, insert) emit after.
1997        // Real chunk-by-chunk progress would require redesigning the
1998        // writer command shape (cross-cuts ADR-0003); P3's bookend
1999        // pattern stays additive without touching the writer.
2000        const INGEST_TOTAL_PHASES: u64 = 4;
2001        crate::mcp_progress::report_if_some(
2002            progress.as_ref(),
2003            1,
2004            Some(INGEST_TOTAL_PHASES),
2005            Some("parsed"),
2006        );
2007        crate::mcp_progress::report_if_some(
2008            progress.as_ref(),
2009            2,
2010            Some(INGEST_TOTAL_PHASES),
2011            Some("chunked"),
2012        );
2013
2014        let report = self
2015            .inner
2016            .tenant
2017            .write()
2018            .ingest_document_as(self.inner.audit_principal.clone(), path, chunk_config)
2019            .await
2020            .map_err(solo_to_mcp)?;
2021
2022        crate::mcp_progress::report_if_some(
2023            progress.as_ref(),
2024            3,
2025            Some(INGEST_TOTAL_PHASES),
2026            Some("embedded"),
2027        );
2028        // Final event includes the real chunk count from the report;
2029        // the per-event `message` field carries it so clients can
2030        // surface "N chunks indexed" without parsing the POST reply
2031        // body.
2032        crate::mcp_progress::report_if_some(
2033            progress.as_ref(),
2034            INGEST_TOTAL_PHASES,
2035            Some(INGEST_TOTAL_PHASES),
2036            Some(&format!("inserted {} chunks", report.chunks_persisted)),
2037        );
2038
2039        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
2040        Ok(CallToolResult::success(vec![Content::text(body)]))
2041    }
2042
2043    async fn handle_search_docs(
2044        &self,
2045        args: SearchDocsArgs,
2046        progress: Option<crate::mcp_progress::ProgressReporter>,
2047    ) -> std::result::Result<CallToolResult, McpError> {
2048        // v0.11.0 P3: progress emission for search is gated on `top_k`
2049        // (passed via `args.limit`) — below 100 the search completes
2050        // fast enough that progress notifications add wire-overhead
2051        // with no UX benefit (Decision C). Above threshold + client
2052        // opted in, emit 3 phase-marker events around the query call.
2053        let top_k = args.limit as u32;
2054        let progress_active = progress.is_some()
2055            && top_k > crate::mcp_progress::MCP_SEARCH_DOCS_PROGRESS_TOP_K_THRESHOLD;
2056        let progress_reporter = if progress_active {
2057            progress.as_ref()
2058        } else {
2059            None
2060        };
2061        const SEARCH_TOTAL_PHASES: u64 = 3;
2062        crate::mcp_progress::report_if_some(
2063            progress_reporter,
2064            1,
2065            Some(SEARCH_TOTAL_PHASES),
2066            Some("hnsw_lookup"),
2067        );
2068
2069        // `solo_query::run_doc_search` validates empty queries (returns
2070        // InvalidInput → invalid_params via solo_to_mcp) and clamps
2071        // limit upstream of the embedder call.
2072        let hits = solo_query::run_doc_search(
2073            self.inner.tenant.as_ref(),
2074            self.inner.audit_principal.clone(),
2075            &args.query,
2076            args.limit,
2077        )
2078        .await
2079        .map_err(solo_to_mcp)?;
2080
2081        crate::mcp_progress::report_if_some(
2082            progress_reporter,
2083            2,
2084            Some(SEARCH_TOTAL_PHASES),
2085            Some("reranked"),
2086        );
2087        crate::mcp_progress::report_if_some(
2088            progress_reporter,
2089            SEARCH_TOTAL_PHASES,
2090            Some(SEARCH_TOTAL_PHASES),
2091            Some(&format!("returning {} hits", hits.len())),
2092        );
2093
2094        let body = serde_json::to_string_pretty(&hits).unwrap_or_else(|_| String::new());
2095        Ok(CallToolResult::success(vec![Content::text(body)]))
2096    }
2097
2098    async fn handle_inspect_document(
2099        &self,
2100        args: InspectDocumentArgs,
2101    ) -> std::result::Result<CallToolResult, McpError> {
2102        let doc_id = DocumentId::from_str(&args.doc_id)
2103            .map_err(|e| McpError::invalid_params(format!("invalid doc_id: {e}"), None))?;
2104        let result_opt = solo_query::inspect_document(
2105            self.inner.tenant.read(),
2106            self.inner.tenant.audit(),
2107            self.inner.audit_principal.clone(),
2108            &doc_id,
2109        )
2110        .await
2111        .map_err(solo_to_mcp)?;
2112        match result_opt {
2113            Some(record) => {
2114                let body = serde_json::to_string_pretty(&record).unwrap_or_else(|_| String::new());
2115                Ok(CallToolResult::success(vec![Content::text(body)]))
2116            }
2117            None => Err(McpError::invalid_params(
2118                format!("document {doc_id} not found"),
2119                None,
2120            )),
2121        }
2122    }
2123
2124    async fn handle_list_documents(
2125        &self,
2126        args: ListDocumentsArgs,
2127    ) -> std::result::Result<CallToolResult, McpError> {
2128        let rows = solo_query::list_documents(
2129            self.inner.tenant.read(),
2130            self.inner.tenant.audit(),
2131            self.inner.audit_principal.clone(),
2132            args.limit,
2133            args.offset,
2134            args.include_forgotten,
2135        )
2136        .await
2137        .map_err(solo_to_mcp)?;
2138        let body = serde_json::to_string_pretty(&rows).unwrap_or_else(|_| String::new());
2139        Ok(CallToolResult::success(vec![Content::text(body)]))
2140    }
2141
2142    async fn handle_forget_document(
2143        &self,
2144        args: ForgetDocumentArgs,
2145    ) -> std::result::Result<CallToolResult, McpError> {
2146        let doc_id = DocumentId::from_str(&args.doc_id)
2147            .map_err(|e| McpError::invalid_params(format!("invalid doc_id: {e}"), None))?;
2148        let report = self
2149            .inner
2150            .tenant
2151            .write()
2152            .forget_document_as(self.inner.audit_principal.clone(), doc_id)
2153            .await
2154            .map_err(solo_to_mcp)?;
2155        let body = serde_json::to_string_pretty(&report).unwrap_or_else(|_| String::new());
2156        Ok(CallToolResult::success(vec![Content::text(body)]))
2157    }
2158}
2159
2160#[cfg(test)]
2161mod dispatch_tests {
2162    //! In-process integration tests for the MCP tool surface. We invoke
2163    //! `SoloMcpServer::dispatch_tool` directly (bypasses the rmcp
2164    //! protocol framing + `RequestContext`, which requires a `Peer`
2165    //! that's not constructible outside rmcp internals). The server is
2166    //! constructed against a real WriterActor + ReaderPool +
2167    //! StubEmbedder + StubVectorIndex from `solo_storage::test_support`.
2168    //!
2169    //! Tests live inline in this module rather than `tests/` because an
2170    //! external integration-test exe in `target/debug/deps/mcp_dispatch-*`
2171    //! tripped Windows UAC ERROR_ELEVATION_REQUIRED on the dev machine.
2172    //! The lib test binary doesn't have that issue.
2173    use super::*;
2174    use serde_json::json;
2175    use solo_core::VectorIndex;
2176    use solo_storage::test_support::StubVectorIndex;
2177    use solo_storage::{
2178        EmbedderConfig, IdentityConfig, KeyMaterial, ReaderPool, SoloConfig, StubEmbedder,
2179        TenantHandle, TenantRegistry, WriterActor, WriterSpawn,
2180    };
2181    use std::sync::Arc as StdArc;
2182
2183    fn fake_config(dim: u32) -> SoloConfig {
2184        SoloConfig {
2185            schema_version: 1,
2186            salt_hex: "00000000000000000000000000000000".to_string(),
2187            embedder: EmbedderConfig {
2188                name: "stub".to_string(),
2189                version: "v1".to_string(),
2190                dim,
2191                dtype: "f32".to_string(),
2192            },
2193            identity: IdentityConfig::default(),
2194            documents: solo_storage::DocumentConfig::default(),
2195            auth: None,
2196            audit: solo_storage::AuditSettings::default(),
2197            redaction: solo_storage::RedactionConfig::default(),
2198            llm: None,
2199            triples: solo_storage::TriplesConfig::default(),
2200            sampling: solo_storage::SamplingConfig::default(),
2201            steward: solo_storage::StewardSettings::default(),
2202        }
2203    }
2204
2205    struct Harness {
2206        server: SoloMcpServer,
2207        _tmp: tempfile::TempDir,
2208        db_path: std::path::PathBuf,
2209        write_handle_extra: Option<solo_storage::WriteHandle>,
2210        join: Option<std::thread::JoinHandle<()>>,
2211    }
2212
2213    impl Harness {
2214        fn new(runtime: &tokio::runtime::Runtime) -> Self {
2215            use solo_storage::embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};
2216
2217            let tmp = tempfile::TempDir::new().unwrap();
2218            let dim = 16usize;
2219            let hnsw: StdArc<dyn VectorIndex + Send + Sync> =
2220                StdArc::new(StubVectorIndex::new(dim));
2221            let embedder: StdArc<dyn solo_core::Embedder> =
2222                StdArc::new(StubEmbedder::new("stub", "v1", dim));
2223
2224            let conn = solo_storage::test_support::open_test_db_at(&tmp.path().join("test.db"));
2225            let embedder_id = get_or_insert_embedder_id(
2226                &conn,
2227                &EmbedderIdentity {
2228                    name: "stub".into(),
2229                    version: "v1".into(),
2230                    dim: dim as u32,
2231                    dtype: "f32".into(),
2232                },
2233            )
2234            .expect("register stub embedder");
2235            let WriterSpawn { handle, join } =
2236                WriterActor::spawn_full(conn, hnsw.clone(), tmp.path().to_path_buf(), embedder_id);
2237
2238            // ReaderPool's deadpool::Pool needs a live tokio runtime for
2239            // both build + drop; build inside block_on.
2240            let path = tmp.path().join("test.db");
2241            let pool: ReaderPool =
2242                runtime.block_on(async { ReaderPool::new(&path, None, hnsw.clone()).unwrap() });
2243
2244            let tenant_id = solo_core::TenantId::default_tenant();
2245            let tenant_handle = StdArc::new(TenantHandle::from_parts_for_tests(
2246                tenant_id.clone(),
2247                fake_config(dim as u32),
2248                path.clone(),
2249                tmp.path().to_path_buf(),
2250                embedder_id,
2251                hnsw,
2252                embedder.clone(),
2253                handle.clone(),
2254                std::thread::spawn(|| {}),
2255                pool,
2256            ));
2257            let key = KeyMaterial::from_bytes_for_tests([0u8; 32]);
2258            let registry = StdArc::new(TenantRegistry::for_tests_with_single_tenant(
2259                tmp.path().to_path_buf(),
2260                key,
2261                embedder,
2262                tenant_handle.clone(),
2263            ));
2264            let server = SoloMcpServer::new_for_tenant(registry, tenant_handle, Vec::new());
2265            Harness {
2266                server,
2267                _tmp: tmp,
2268                db_path: path,
2269                write_handle_extra: Some(handle),
2270                join: Some(join),
2271            }
2272        }
2273
2274        fn open_db(&self) -> rusqlite::Connection {
2275            solo_storage::test_support::open_test_db_at(&self.db_path)
2276        }
2277
2278        fn shutdown(mut self, runtime: &tokio::runtime::Runtime) {
2279            // The whole shutdown runs inside block_on so deadpool-sqlite's
2280            // drop (which schedules cleanup on the active runtime) sees a
2281            // live reactor. Without this, dropping the SoloMcpServer
2282            // (which holds the ReaderPool through its Arc<Inner>) panics
2283            // with "no reactor running".
2284            let join = self.join.take();
2285            let extra = self.write_handle_extra.take();
2286            runtime.block_on(async move {
2287                drop(extra);
2288                drop(self.server);
2289                drop(self._tmp);
2290                if let Some(join) = join {
2291                    let (tx, rx) = std::sync::mpsc::channel();
2292                    std::thread::spawn(move || {
2293                        let _ = tx.send(join.join());
2294                    });
2295                    tokio::task::spawn_blocking(move || {
2296                        rx.recv_timeout(std::time::Duration::from_secs(5))
2297                    })
2298                    .await
2299                    .expect("blocking task")
2300                    .expect("writer thread did not exit within 5s")
2301                    .expect("writer thread panicked");
2302                }
2303            });
2304        }
2305    }
2306
2307    fn rt() -> tokio::runtime::Runtime {
2308        tokio::runtime::Builder::new_multi_thread()
2309            .worker_threads(2)
2310            .enable_all()
2311            .build()
2312            .unwrap()
2313    }
2314
2315    /// Pull the first Content::text body out of a CallToolResult. Use
2316    /// serde_json roundtrip as a robust extractor — `Content`'s public
2317    /// API doesn't directly expose the inner text without going through
2318    /// pattern-matching on RawContent.
2319    fn first_text(r: &rmcp::model::CallToolResult) -> String {
2320        let first = r.content.first().expect("at least one content item");
2321        let v = serde_json::to_value(first).expect("content serialises");
2322        v.get("text")
2323            .and_then(|t| t.as_str())
2324            .map(|s| s.to_string())
2325            .unwrap_or_else(|| format!("{v}"))
2326    }
2327
2328    fn seed_episode(conn: &rusqlite::Connection, content: &str) -> (MemoryId, i64) {
2329        let memory_id = MemoryId::new();
2330        conn.execute(
2331            "INSERT INTO episodes
2332                (memory_id, ts_ms, source_type, content, confidence, strength,
2333                 salience, tier, status, created_at_ms, updated_at_ms)
2334             VALUES (?1, 0, 'test', ?2, 0.9, 0.5, 0.5, 'hot', 'active', 0, 0)",
2335            rusqlite::params![memory_id.to_string(), content],
2336        )
2337        .expect("seed episode");
2338        (memory_id, conn.last_insert_rowid())
2339    }
2340
2341    fn seed_triple_row(
2342        conn: &rusqlite::Connection,
2343        triple_id: &str,
2344        subject: &str,
2345        predicate: &str,
2346        object: &str,
2347        source_episode_rowid: Option<i64>,
2348    ) {
2349        conn.execute(
2350            "INSERT INTO triples
2351                 (triple_id, subject_id, predicate, object_id, object_kind,
2352                  valid_from_ms, valid_to_ms, confidence, provenance_json,
2353                  status, created_at_ms, updated_at_ms, source_episode_id)
2354                 VALUES (?1, ?2, ?3, ?4, 'literal', 0, NULL, 0.9, '{}',
2355                         'active', 0, 0, ?5)",
2356            rusqlite::params![triple_id, subject, predicate, object, source_episode_rowid],
2357        )
2358        .expect("seed triple");
2359    }
2360
2361    fn seed_contradiction_row(conn: &rusqlite::Connection, a_id: &str, b_id: &str, kind: &str) {
2362        conn.execute(
2363            "INSERT INTO contradictions
2364                 (a_memory_id, b_memory_id, kind, explanation, detected_at_ms,
2365                  status, resolved_at_ms, resolution_note, winning_triple_id)
2366                 VALUES (?1, ?2, ?3, 'test contradiction', 0,
2367                         'unresolved', NULL, NULL, NULL)",
2368            rusqlite::params![a_id, b_id, kind],
2369        )
2370        .expect("seed contradiction");
2371    }
2372
2373    #[test]
2374    fn tools_list_returns_eighteen_canonical_tools() {
2375        let runtime = rt();
2376        let h = Harness::new(&runtime);
2377        let tools = h.server.dispatch_list_tools();
2378        let names: Vec<&str> = tools.iter().map(|t| t.name.as_ref()).collect();
2379        assert_eq!(
2380            names,
2381            vec![
2382                "memory_remember",
2383                // v0.9.2 — batched-remember for agentic clients.
2384                "memory_remember_batch",
2385                "memory_recall",
2386                "memory_context",
2387                "memory_forget",
2388                "memory_inspect",
2389                "memory_update",
2390                // Derived-layer tools added in v0.4.0:
2391                "memory_themes",
2392                "memory_facts_about",
2393                "memory_entities",
2394                "memory_contradictions",
2395                "memory_contradiction_resolve",
2396                // Added in v0.5.0 (Priority 3):
2397                "memory_inspect_cluster",
2398                // Document tools added in v0.7.0:
2399                "memory_ingest_document",
2400                "memory_search_docs",
2401                "memory_inspect_document",
2402                "memory_list_documents",
2403                "memory_forget_document",
2404            ]
2405        );
2406        for t in &tools {
2407            // rmcp 1.x: Tool.description is Option<Cow<'static, str>>.
2408            let desc = t.description.as_deref().unwrap_or("");
2409            assert!(!desc.is_empty(), "{} description empty", t.name);
2410            let _schema = t.schema_as_json_value();
2411            // `required` is intentionally absent on memory_themes +
2412            // memory_contradictions + memory_list_documents (all args
2413            // optional with defaults). memory_facts_about has required
2414            // = ["subject"], etc. We don't assert per-tool 'required'
2415            // shape here; the schema's `properties` field is the more
2416            // important signal and is always present.
2417        }
2418        h.shutdown(&runtime);
2419    }
2420
2421    #[test]
2422    fn themes_returns_json_array_on_empty_db() {
2423        let runtime = rt();
2424        let h = Harness::new(&runtime);
2425        runtime.block_on(async {
2426            let r = h
2427                .server
2428                .dispatch_tool("memory_themes", json!({}), None)
2429                .await
2430                .expect("themes succeeds");
2431            let text = first_text(&r);
2432            // Empty derived layer → empty array JSON. Parses cleanly.
2433            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2434            assert!(v.is_array(), "expected array, got: {text}");
2435            assert_eq!(v.as_array().unwrap().len(), 0);
2436        });
2437        h.shutdown(&runtime);
2438    }
2439
2440    #[test]
2441    fn themes_passes_through_window_and_limit_args() {
2442        let runtime = rt();
2443        let h = Harness::new(&runtime);
2444        runtime.block_on(async {
2445            // Should not crash with optional + integer args present.
2446            let r = h
2447                .server
2448                .dispatch_tool(
2449                    "memory_themes",
2450                    json!({ "window_days": 7, "limit": 20 }),
2451                    None,
2452                )
2453                .await
2454                .expect("themes with args succeeds");
2455            let text = first_text(&r);
2456            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2457            assert!(v.is_array());
2458        });
2459        h.shutdown(&runtime);
2460    }
2461
2462    #[test]
2463    fn facts_about_rejects_empty_subject() {
2464        let runtime = rt();
2465        let h = Harness::new(&runtime);
2466        runtime.block_on(async {
2467            let err = h
2468                .server
2469                .dispatch_tool("memory_facts_about", json!({ "subject": "   " }), None)
2470                .await
2471                .expect_err("empty subject must error");
2472            // McpError doesn't expose a clean kind/message accessor; just
2473            // verify the error fires (validation path reached).
2474            let s = format!("{err:?}");
2475            assert!(
2476                s.to_lowercase().contains("subject") || s.to_lowercase().contains("invalid"),
2477                "got: {s}"
2478            );
2479        });
2480        h.shutdown(&runtime);
2481    }
2482
2483    #[test]
2484    fn facts_about_returns_array_for_unknown_subject() {
2485        let runtime = rt();
2486        let h = Harness::new(&runtime);
2487        runtime.block_on(async {
2488            let r = h
2489                .server
2490                .dispatch_tool(
2491                    "memory_facts_about",
2492                    json!({ "subject": "NobodyKnowsThisSubject" }),
2493                    None,
2494                )
2495                .await
2496                .expect("facts_about with unknown subject succeeds");
2497            let text = first_text(&r);
2498            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2499            assert_eq!(v.as_array().unwrap().len(), 0);
2500        });
2501        h.shutdown(&runtime);
2502    }
2503
2504    #[test]
2505    fn facts_about_accepts_include_as_object_arg() {
2506        // Asserts the v0.5.1 P8 arg is parsed (serde default lets it
2507        // be omitted) and forwarded to the query lib without choking
2508        // the dispatcher. We don't seed triples — what we need to
2509        // verify is that the optional bool flows through. Both with
2510        // and without the arg, dispatch succeeds and returns an
2511        // empty array. (Functional coverage of the object-position
2512        // widening lives in the query-crate tests.)
2513        let runtime = rt();
2514        let h = Harness::new(&runtime);
2515        runtime.block_on(async {
2516            // With include_as_object=true.
2517            let r = h
2518                .server
2519                .dispatch_tool(
2520                    "memory_facts_about",
2521                    json!({ "subject": "Maya", "include_as_object": true }),
2522                    None,
2523                )
2524                .await
2525                .expect("dispatch with include_as_object=true succeeds");
2526            let v: serde_json::Value =
2527                serde_json::from_str(&first_text(&r)).expect("parses as json");
2528            assert_eq!(v.as_array().unwrap().len(), 0);
2529
2530            // Omitted entirely — must default to false (no error).
2531            let r = h
2532                .server
2533                .dispatch_tool("memory_facts_about", json!({ "subject": "Maya" }), None)
2534                .await
2535                .expect("dispatch without include_as_object succeeds (default false)");
2536            let v: serde_json::Value =
2537                serde_json::from_str(&first_text(&r)).expect("parses as json");
2538            assert_eq!(v.as_array().unwrap().len(), 0);
2539        });
2540        h.shutdown(&runtime);
2541    }
2542
2543    #[test]
2544    fn contradictions_returns_json_array_on_empty_db() {
2545        let runtime = rt();
2546        let h = Harness::new(&runtime);
2547        runtime.block_on(async {
2548            let r = h
2549                .server
2550                .dispatch_tool("memory_contradictions", json!({}), None)
2551                .await
2552                .expect("contradictions succeeds");
2553            let text = first_text(&r);
2554            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2555            assert!(v.is_array());
2556            assert_eq!(v.as_array().unwrap().len(), 0);
2557        });
2558        h.shutdown(&runtime);
2559    }
2560
2561    #[test]
2562    fn entities_returns_matching_graph_entities() {
2563        let runtime = rt();
2564        let h = Harness::new(&runtime);
2565        {
2566            let conn = h.open_db();
2567            let (_memory_id, rowid) = seed_episode(&conn, "Alice graph seed");
2568            seed_triple_row(
2569                &conn,
2570                "t-mcp-entity-1",
2571                "Alice",
2572                "knows",
2573                "Bob",
2574                Some(rowid),
2575            );
2576        }
2577        runtime.block_on(async {
2578            let r = h
2579                .server
2580                .dispatch_tool("memory_entities", json!({ "query": "Ali" }), None)
2581                .await
2582                .expect("entities succeeds");
2583            let v: serde_json::Value =
2584                serde_json::from_str(&first_text(&r)).expect("parses as json");
2585            assert!(
2586                v.as_array()
2587                    .unwrap()
2588                    .iter()
2589                    .any(|row| row.get("entity_id").and_then(|id| id.as_str()) == Some("Alice")),
2590                "expected Alice entity, got {v}"
2591            );
2592        });
2593        h.shutdown(&runtime);
2594    }
2595
2596    #[test]
2597    fn contradiction_resolve_updates_lifecycle() {
2598        let runtime = rt();
2599        let h = Harness::new(&runtime);
2600        {
2601            let conn = h.open_db();
2602            let (_memory_id, rowid) = seed_episode(&conn, "contradiction seed");
2603            seed_triple_row(&conn, "t-mcp-a", "Alice", "likes", "tea", Some(rowid));
2604            seed_triple_row(&conn, "t-mcp-b", "Alice", "likes", "coffee", Some(rowid));
2605            seed_contradiction_row(&conn, "t-mcp-a", "t-mcp-b", "other");
2606        }
2607        runtime.block_on(async {
2608            let r = h
2609                .server
2610                .dispatch_tool(
2611                    "memory_contradiction_resolve",
2612                    json!({
2613                        "a_id": "t-mcp-a",
2614                        "b_id": "t-mcp-b",
2615                        "kind": "other",
2616                        "resolution_note": "tea is current",
2617                        "winning_triple_id": "t-mcp-a"
2618                    }),
2619                    None,
2620                )
2621                .await
2622                .expect("resolve succeeds");
2623            let resolved: serde_json::Value =
2624                serde_json::from_str(&first_text(&r)).expect("parses as json");
2625            assert_eq!(
2626                resolved.get("status").and_then(|v| v.as_str()),
2627                Some("resolved")
2628            );
2629            assert!(
2630                resolved
2631                    .get("resolved_at_ms")
2632                    .and_then(|v| v.as_i64())
2633                    .is_some()
2634            );
2635        });
2636        h.shutdown(&runtime);
2637    }
2638
2639    #[test]
2640    fn remember_then_recall_round_trip() {
2641        let runtime = rt();
2642        let h = Harness::new(&runtime);
2643        // Use &h.server directly (no clone) so the only outstanding
2644        // reference at shutdown time is the harness's own. The clone
2645        // path triggered a 5-second writer-thread timeout because the
2646        // local clone held an Arc<Inner> with its own WriteHandle past
2647        // h.shutdown().
2648        runtime.block_on(async {
2649            let r = h
2650                .server
2651                .dispatch_tool(
2652                    "memory_remember",
2653                    json!({ "content": "the cat sat on the mat" }),
2654                    None,
2655                )
2656                .await
2657                .expect("remember succeeds");
2658            let text = first_text(&r);
2659            assert!(text.starts_with("remembered "), "got: {text}");
2660
2661            let r = h
2662                .server
2663                .dispatch_tool(
2664                    "memory_recall",
2665                    json!({ "query": "the cat sat on the mat", "limit": 5 }),
2666                    None,
2667                )
2668                .await
2669                .expect("recall succeeds");
2670            let text = first_text(&r);
2671            assert!(text.contains("the cat sat on the mat"), "got: {text}");
2672        });
2673        h.shutdown(&runtime);
2674    }
2675
2676    #[test]
2677    fn update_rewrites_memory_content() {
2678        let runtime = rt();
2679        let h = Harness::new(&runtime);
2680        runtime.block_on(async {
2681            let r = h
2682                .server
2683                .dispatch_tool(
2684                    "memory_remember",
2685                    json!({ "content": "old mcp transport memory" }),
2686                    None,
2687                )
2688                .await
2689                .expect("remember succeeds");
2690            let text = first_text(&r);
2691            let mid = text
2692                .strip_prefix("remembered ")
2693                .expect("remembered prefix")
2694                .to_string();
2695
2696            let r = h
2697                .server
2698                .dispatch_tool(
2699                    "memory_update",
2700                    json!({
2701                        "memory_id": mid,
2702                        "content": "new mcp transport memory"
2703                    }),
2704                    None,
2705                )
2706                .await
2707                .expect("update succeeds");
2708            let updated: serde_json::Value =
2709                serde_json::from_str(&first_text(&r)).expect("parses as json");
2710            assert_eq!(
2711                updated.get("content").and_then(|v| v.as_str()),
2712                Some("new mcp transport memory")
2713            );
2714        });
2715        h.shutdown(&runtime);
2716    }
2717
2718    #[test]
2719    fn memory_context_returns_json_bundle() {
2720        let runtime = rt();
2721        let h = Harness::new(&runtime);
2722        runtime.block_on(async {
2723            h.server
2724                .dispatch_tool(
2725                    "memory_remember",
2726                    json!({ "content": "memory context round trip" }),
2727                    None,
2728                )
2729                .await
2730                .expect("remember succeeds");
2731
2732            let r = h
2733                .server
2734                .dispatch_tool(
2735                    "memory_context",
2736                    json!({ "query": "memory context", "limit": 5 }),
2737                    None,
2738                )
2739                .await
2740                .expect("memory_context succeeds");
2741            let text = first_text(&r);
2742            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
2743            assert_eq!(v["query"], "memory context");
2744            assert!(
2745                v["recall"]["hits"]
2746                    .as_array()
2747                    .unwrap()
2748                    .iter()
2749                    .any(|h| h["content"] == "memory context round trip"),
2750                "context recall should include remembered content: {v}"
2751            );
2752            assert!(v["themes"].is_array());
2753            assert!(v["facts"].is_array());
2754            assert!(v["contradictions"].is_array());
2755        });
2756        h.shutdown(&runtime);
2757    }
2758
2759    #[test]
2760    fn forget_excludes_row_from_subsequent_recall() {
2761        let runtime = rt();
2762        let h = Harness::new(&runtime);
2763
2764        runtime.block_on(async {
2765            let r = h
2766                .server
2767                .dispatch_tool(
2768                    "memory_remember",
2769                    json!({ "content": "to be forgotten" }),
2770                    None,
2771                )
2772                .await
2773                .unwrap();
2774            let text = first_text(&r);
2775            let mid = text.strip_prefix("remembered ").unwrap().to_string();
2776
2777            h.server
2778                .dispatch_tool(
2779                    "memory_forget",
2780                    json!({ "memory_id": mid, "reason": "test" }),
2781                    None,
2782                )
2783                .await
2784                .expect("forget succeeds");
2785
2786            let r = h
2787                .server
2788                .dispatch_tool(
2789                    "memory_recall",
2790                    json!({ "query": "to be forgotten", "limit": 5 }),
2791                    None,
2792                )
2793                .await
2794                .unwrap();
2795            let text = first_text(&r);
2796            assert!(
2797                !text.contains(r#""content": "to be forgotten""#),
2798                "forgotten row should be excluded; got: {text}"
2799            );
2800        });
2801        h.shutdown(&runtime);
2802    }
2803
2804    #[test]
2805    fn empty_remember_returns_invalid_params() {
2806        let runtime = rt();
2807        let h = Harness::new(&runtime);
2808        runtime.block_on(async {
2809            let err = h
2810                .server
2811                .dispatch_tool("memory_remember", json!({ "content": "" }), None)
2812                .await
2813                .unwrap_err();
2814            assert!(format!("{err:?}").contains("must not be empty"));
2815        });
2816        h.shutdown(&runtime);
2817    }
2818
2819    #[test]
2820    fn empty_recall_query_returns_invalid_params() {
2821        let runtime = rt();
2822        let h = Harness::new(&runtime);
2823        runtime.block_on(async {
2824            let err = h
2825                .server
2826                .dispatch_tool("memory_recall", json!({ "query": "   " }), None)
2827                .await
2828                .unwrap_err();
2829            assert!(format!("{err:?}").contains("must not be empty"));
2830        });
2831        h.shutdown(&runtime);
2832    }
2833
2834    #[test]
2835    fn inspect_with_invalid_id_returns_invalid_params() {
2836        let runtime = rt();
2837        let h = Harness::new(&runtime);
2838        runtime.block_on(async {
2839            let err = h
2840                .server
2841                .dispatch_tool("memory_inspect", json!({ "memory_id": "not-a-uuid" }), None)
2842                .await
2843                .unwrap_err();
2844            assert!(format!("{err:?}").contains("invalid memory_id"));
2845        });
2846        h.shutdown(&runtime);
2847    }
2848
2849    #[test]
2850    fn forget_unknown_id_returns_invalid_params() {
2851        let runtime = rt();
2852        let h = Harness::new(&runtime);
2853        runtime.block_on(async {
2854            // Valid UUID format but not in episodes — handle_forget
2855            // surfaces NotFound, mapped to invalid_params per
2856            // solo_to_mcp.
2857            let err = h
2858                .server
2859                .dispatch_tool(
2860                    "memory_forget",
2861                    json!({ "memory_id": "00000000-0000-7000-8000-000000000000" }),
2862                    None,
2863                )
2864                .await
2865                .unwrap_err();
2866            assert!(format!("{err:?}").contains("not found"));
2867        });
2868        h.shutdown(&runtime);
2869    }
2870
2871    #[test]
2872    fn unknown_tool_name_returns_invalid_params() {
2873        let runtime = rt();
2874        let h = Harness::new(&runtime);
2875        runtime.block_on(async {
2876            let err = h
2877                .server
2878                .dispatch_tool("memory.summon", json!({}), None)
2879                .await
2880                .unwrap_err();
2881            assert!(format!("{err:?}").contains("unknown tool"));
2882        });
2883        h.shutdown(&runtime);
2884    }
2885
2886    /// Regression guard for v0.4.1's MCP tool name fix, generalised
2887    /// in v0.5.0 Priority 4 to cover **all three** major LLM
2888    /// providers, not just Anthropic.
2889    ///
2890    /// Each provider enforces its own tool-name regex on the
2891    /// function-calling wire. A tool name has to satisfy ALL of them
2892    /// to be portable across clients:
2893    ///
2894    ///   - **Anthropic**: `^[a-zA-Z0-9_-]{1,64}$` (what shipped in
2895    ///     v0.4.1; failing this rejects the entire toolset on Claude
2896    ///     Desktop / Cursor / Claude Code with
2897    ///     `FrontendRemoteMcpToolDefinition.name: String should
2898    ///     match pattern ...`).
2899    ///   - **OpenAI** function-calling: `^[a-zA-Z_][a-zA-Z0-9_-]*$`
2900    ///     with length ≤ 64 (must start with letter or underscore).
2901    ///   - **Gemini** function-calling: documented as a-z, A-Z, 0-9,
2902    ///     underscores and dashes; some sources also allow dots. We
2903    ///     use the conservative intersection — must start with
2904    ///     letter or underscore, alphanumeric + underscore only (no
2905    ///     hyphen, no dot), length ≤ 63. This is the strictest of
2906    ///     the three patterns, so any tool that passes it also
2907    ///     passes the other two. Sources differ on whether Gemini
2908    ///     accepts dots or hyphens; the strictest reading guards us
2909    ///     against the future where one provider tightens the regex
2910    ///     (which is the failure mode v0.4.1 hit on Anthropic). See
2911    ///     <https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/docs/api/google/generativeai/protos/FunctionDeclaration.md>
2912    ///     and <https://ai.google.dev/gemini-api/docs/function-calling>.
2913    ///
2914    /// Lesson banked v0.3 #8: rmcp framing tests pass dot-named
2915    /// tools fine because rmcp's own client-side validation is
2916    /// permissive. Only the downstream provider API enforces the
2917    /// regex. This test gates the names at `cargo test` time so any
2918    /// future tool-name change has to pass all three provider
2919    /// regexes before reaching real clients.
2920    #[test]
2921    fn tool_names_match_cross_provider_regex() {
2922        /// Anthropic API name regex: `^[a-zA-Z0-9_-]{1,64}$`.
2923        fn passes_anthropic(name: &str) -> bool {
2924            let len = name.len();
2925            if !(1..=64).contains(&len) {
2926                return false;
2927            }
2928            name.chars()
2929                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
2930        }
2931
2932        /// OpenAI function-calling name regex:
2933        /// `^[a-zA-Z_][a-zA-Z0-9_-]*$`, length ≤ 64.
2934        fn passes_openai(name: &str) -> bool {
2935            let len = name.len();
2936            if !(1..=64).contains(&len) {
2937                return false;
2938            }
2939            let mut chars = name.chars();
2940            let first = match chars.next() {
2941                Some(c) => c,
2942                None => return false,
2943            };
2944            if !(first.is_ascii_alphabetic() || first == '_') {
2945                return false;
2946            }
2947            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
2948        }
2949
2950        /// Gemini function-calling name regex (conservative
2951        /// reading): `^[a-zA-Z_][a-zA-Z0-9_]*$`, length ≤ 63. No
2952        /// hyphen, no dot — strictest of the three so any name that
2953        /// passes this passes the other two.
2954        fn passes_gemini(name: &str) -> bool {
2955            let len = name.len();
2956            if !(1..=63).contains(&len) {
2957                return false;
2958            }
2959            let mut chars = name.chars();
2960            let first = match chars.next() {
2961                Some(c) => c,
2962                None => return false,
2963            };
2964            if !(first.is_ascii_alphabetic() || first == '_') {
2965                return false;
2966            }
2967            chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
2968        }
2969
2970        let tools = build_tools();
2971        assert_eq!(
2972            tools.len(),
2973            18,
2974            "expected 18 tools (context + update/entities/resolve + v0.5.x + document tools + remember_batch)"
2975        );
2976        // Sanity-check that tool_names() agrees with build_tools().
2977        let tool_name_strings: Vec<String> = tools.iter().map(|t| t.name.to_string()).collect();
2978        let public_names: Vec<String> = super::tool_names().iter().map(|s| s.to_string()).collect();
2979        assert_eq!(
2980            tool_name_strings, public_names,
2981            "tool_names() drifted from build_tools() — keep them in sync"
2982        );
2983
2984        for t in tools {
2985            assert!(
2986                passes_anthropic(&t.name),
2987                "tool name {:?} fails Anthropic regex \
2988                 ^[a-zA-Z0-9_-]{{1,64}}$ — see v0.3 lesson #8",
2989                t.name
2990            );
2991            assert!(
2992                passes_openai(&t.name),
2993                "tool name {:?} fails OpenAI function-calling regex \
2994                 ^[a-zA-Z_][a-zA-Z0-9_-]*$ (len ≤ 64)",
2995                t.name
2996            );
2997            assert!(
2998                passes_gemini(&t.name),
2999                "tool name {:?} fails Gemini function-calling regex \
3000                 ^[a-zA-Z_][a-zA-Z0-9_]*$ (len ≤ 63, strict)",
3001                t.name
3002            );
3003        }
3004    }
3005
3006    /// Regression guard (dev-log 0152 finding M3): the
3007    /// `memory_remember_batch` JSON Schema's `items.maxItems` must equal
3008    /// the runtime cap `solo_storage::MAX_REMEMBER_BATCH_SIZE`. The
3009    /// schema is now derived from the constant, but pin the literal so a
3010    /// future drift (someone hard-codes `200` again) is caught.
3011    #[test]
3012    fn remember_batch_maxitems_matches_max_batch_size() {
3013        let tools = build_tools();
3014        let batch = tools
3015            .iter()
3016            .find(|t| t.name == "memory_remember_batch")
3017            .expect("memory_remember_batch tool is missing");
3018        let schema = serde_json::to_value(&batch.input_schema)
3019            .expect("input_schema serialises as JSON");
3020        let max_items = schema
3021            .get("properties")
3022            .and_then(|p| p.get("items"))
3023            .and_then(|i| i.get("maxItems"))
3024            .and_then(|n| n.as_u64())
3025            .expect("memory_remember_batch.items.maxItems missing or not a u64");
3026        assert_eq!(
3027            max_items as usize,
3028            solo_storage::MAX_REMEMBER_BATCH_SIZE,
3029            "memory_remember_batch schema maxItems ({}) must equal \
3030             solo_storage::MAX_REMEMBER_BATCH_SIZE ({}). If the cap \
3031             changed, update both — but you should never need to: the \
3032             schema now interpolates the constant directly.",
3033            max_items,
3034            solo_storage::MAX_REMEMBER_BATCH_SIZE,
3035        );
3036    }
3037
3038    /// Regression guard for the v0.5.0 Priority 4 jargon pass.
3039    ///
3040    /// Tool descriptions and `get_info().instructions` are the first
3041    /// (and often only) thing a calling LLM reads when its
3042    /// tool-search mechanism decides whether Solo's tools are
3043    /// relevant. Earlier descriptions leaned on Solo-internal
3044    /// vocabulary (`SPO`, `Steward`, `LEFT JOIN`, `candidate pair`,
3045    /// `tagged_with`) which doesn't pattern-match natural-language
3046    /// agent queries like "what do you know about Alex?" — that's
3047    /// the load-bearing v0.5.0 finding from the 2026-05-14
3048    /// thesis-test in Claude Desktop.
3049    ///
3050    /// This test pins the de-jargoning by forbidding the old
3051    /// vocabulary from appearing in any user-facing text. Future
3052    /// contributors who reach for jargon trip the test and have to
3053    /// pick plain-English phrasing instead.
3054    #[test]
3055    fn tool_descriptions_avoid_internal_jargon() {
3056        // Case-insensitive substring match. Drawn from the
3057        // pre-Priority-4 descriptions; expand only if a new term
3058        // creeps in.
3059        const FORBIDDEN: &[&str] = &[
3060            "SPO",
3061            "Steward",
3062            "Steward-flagged",
3063            "LEFT JOIN",
3064            "candidate pair",
3065            "candidate_pair",
3066            "tagged_with",
3067        ];
3068
3069        fn contains_case_insensitive(haystack: &str, needle: &str) -> bool {
3070            haystack.to_lowercase().contains(&needle.to_lowercase())
3071        }
3072
3073        // 1. Each tool description.
3074        for t in build_tools() {
3075            let desc = t.description.as_deref().unwrap_or("");
3076            for term in FORBIDDEN {
3077                assert!(
3078                    !contains_case_insensitive(desc, term),
3079                    "tool {:?} description contains forbidden jargon \
3080                     {:?} — rewrite in plain English (see v0.5.0 \
3081                     Priority 4)",
3082                    t.name,
3083                    term,
3084                );
3085            }
3086        }
3087
3088        // 2. The server-level instructions (what tool-search sees
3089        // first).
3090        let server_info = harness_server_info();
3091        let instructions = server_info
3092            .instructions
3093            .as_deref()
3094            .expect("get_info() must set instructions");
3095        for term in FORBIDDEN {
3096            assert!(
3097                !contains_case_insensitive(instructions, term),
3098                "get_info().instructions contains forbidden jargon \
3099                 {:?} — rewrite in plain English",
3100                term,
3101            );
3102        }
3103    }
3104
3105    /// Build a `ServerInfo` for the jargon test without spinning up
3106    /// the full harness (which needs tokio + tempdir). The
3107    /// `ServerHandler::get_info()` method doesn't take `&self` state
3108    /// in any meaningful way for our impl — it returns a static
3109    /// `ServerInfo` literal — so we construct a minimal-input server
3110    /// just to call it.
3111    fn harness_server_info() -> rmcp::model::ServerInfo {
3112        let runtime = rt();
3113        let h = Harness::new(&runtime);
3114        let info = ServerHandler::get_info(&h.server);
3115        h.shutdown(&runtime);
3116        info
3117    }
3118
3119    /// Regression guard for the v0.9.0 → v0.9.1 P1 Fix 1 MCP
3120    /// `serverInfo` identity regression.
3121    ///
3122    /// In v0.9.0, P0a's rmcp 0.1.5 → 1.7 bump replaced the explicit
3123    /// `Implementation::new("solo", "<version>")` constructor with
3124    /// `Implementation::from_build_env()`. That helper reads
3125    /// `CARGO_PKG_NAME` + `CARGO_PKG_VERSION` from **rmcp's own** build
3126    /// environment (the proc-macro expansion captures rmcp's
3127    /// `Cargo.toml`, not the consumer's). Every Solo MCP daemon on
3128    /// v0.9.0 self-identified as `{name: "rmcp", version: "1.7.0"}`
3129    /// instead of `{name: "solo", version: "<workspace.version>"}`.
3130    ///
3131    /// Pins:
3132    ///   - `name == "solo"` (the operator-facing binary name, not
3133    ///     `"solo-api"` which would come from
3134    ///     `env!("CARGO_PKG_NAME")` against this crate's manifest);
3135    ///   - `version == env!("CARGO_PKG_VERSION")` from solo-api's own
3136    ///     compile environment (this is the workspace.package version
3137    ///     via inheritance, so it stays in sync with `solo --version`
3138    ///     and `solo-cli`'s identity).
3139    #[test]
3140    fn server_info_identity_is_solo_not_rmcp_or_solo_api() {
3141        let info = harness_server_info();
3142        let name = info.server_info.name.as_str();
3143        let version = info.server_info.version.as_str();
3144        assert_eq!(
3145            name, "solo",
3146            "MCP serverInfo.name must be \"solo\" (not \"rmcp\" or \
3147             \"solo-api\"). got name={name:?} version={version:?}"
3148        );
3149        assert_eq!(
3150            version,
3151            env!("CARGO_PKG_VERSION"),
3152            "MCP serverInfo.version must match solo-api's compile-time \
3153             CARGO_PKG_VERSION (i.e. the workspace.package version); \
3154             a mismatch means we regressed back to rmcp's build env. \
3155             got version={version:?}"
3156        );
3157    }
3158
3159    // ---- memory_inspect_cluster (v0.5.0 Priority 3) ----
3160
3161    #[test]
3162    fn inspect_cluster_unknown_id_returns_invalid_params() {
3163        // NotFound from solo_query::inspect_cluster is mapped through
3164        // `solo_to_mcp` to `invalid_params` (MCP has no separate
3165        // not-found error shape). Error message should name the id.
3166        let runtime = rt();
3167        let h = Harness::new(&runtime);
3168        runtime.block_on(async {
3169            let err = h
3170                .server
3171                .dispatch_tool(
3172                    "memory_inspect_cluster",
3173                    json!({ "cluster_id": "no-such-cluster" }),
3174                    None,
3175                )
3176                .await
3177                .expect_err("unknown cluster must error");
3178            let s = format!("{err:?}");
3179            assert!(
3180                s.contains("no-such-cluster") || s.to_lowercase().contains("not found"),
3181                "expected error to mention the missing cluster id; got: {s}"
3182            );
3183        });
3184        h.shutdown(&runtime);
3185    }
3186
3187    #[test]
3188    fn inspect_cluster_rejects_empty_id() {
3189        let runtime = rt();
3190        let h = Harness::new(&runtime);
3191        runtime.block_on(async {
3192            let err = h
3193                .server
3194                .dispatch_tool(
3195                    "memory_inspect_cluster",
3196                    json!({ "cluster_id": "   " }),
3197                    None,
3198                )
3199                .await
3200                .expect_err("blank cluster_id must error");
3201            let s = format!("{err:?}");
3202            assert!(
3203                s.to_lowercase().contains("cluster_id")
3204                    || s.to_lowercase().contains("must not be empty"),
3205                "got: {s}"
3206            );
3207        });
3208        h.shutdown(&runtime);
3209    }
3210
3211    // ---- Document tools (v0.7.0 P5) ----
3212    //
3213    // The five document handlers each have two arg-shape tests:
3214    //   - arg-struct parses from JSON (serde round-trip; defaults work).
3215    //   - dispatch arm routes to the handler (we observe behaviour via
3216    //     a known empty-DB response — bad routing surfaces as
3217    //     "unknown tool" or wrong shape).
3218    //
3219    // Functional coverage (ingest → search → inspect → forget) lives in
3220    // `crates/solo-cli/tests/mcp_smoke.rs` where a real subprocess + real
3221    // writer-with-embedder is wired up. The in-process Harness here uses
3222    // `WriterActor::spawn` which doesn't carry an embedder, so ingest /
3223    // search themselves return an error — but the dispatch + arg-parse
3224    // paths exercise correctly.
3225
3226    #[test]
3227    fn ingest_document_args_parse_with_required_path() {
3228        let v: IngestDocumentArgs =
3229            serde_json::from_value(json!({ "path": "/tmp/notes.md" })).expect("parses");
3230        assert_eq!(v.path, "/tmp/notes.md");
3231        // path is required — missing must reject at deserialization.
3232        let err = serde_json::from_value::<IngestDocumentArgs>(json!({})).unwrap_err();
3233        assert!(format!("{err}").contains("path"));
3234    }
3235
3236    #[test]
3237    fn search_docs_args_parse_with_default_limit() {
3238        let v: SearchDocsArgs =
3239            serde_json::from_value(json!({ "query": "backups" })).expect("parses");
3240        assert_eq!(v.query, "backups");
3241        assert_eq!(v.limit, 5, "default limit must be 5");
3242        let v: SearchDocsArgs =
3243            serde_json::from_value(json!({ "query": "backups", "limit": 20 })).expect("parses");
3244        assert_eq!(v.limit, 20);
3245    }
3246
3247    #[test]
3248    fn inspect_document_args_parse_with_required_doc_id() {
3249        let v: InspectDocumentArgs =
3250            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
3251        assert_eq!(v.doc_id, "abc");
3252        let err = serde_json::from_value::<InspectDocumentArgs>(json!({})).unwrap_err();
3253        assert!(format!("{err}").contains("doc_id"));
3254    }
3255
3256    #[test]
3257    fn list_documents_args_parse_with_all_defaults() {
3258        let v: ListDocumentsArgs = serde_json::from_value(json!({})).expect("parses");
3259        assert_eq!(v.limit, 20, "default limit must be 20");
3260        assert_eq!(v.offset, 0, "default offset must be 0");
3261        assert!(
3262            !v.include_forgotten,
3263            "default include_forgotten must be false"
3264        );
3265        let v: ListDocumentsArgs =
3266            serde_json::from_value(json!({ "limit": 5, "offset": 10, "include_forgotten": true }))
3267                .expect("parses");
3268        assert_eq!(v.limit, 5);
3269        assert_eq!(v.offset, 10);
3270        assert!(v.include_forgotten);
3271    }
3272
3273    #[test]
3274    fn forget_document_args_parse_with_required_doc_id() {
3275        let v: ForgetDocumentArgs =
3276            serde_json::from_value(json!({ "doc_id": "abc" })).expect("parses");
3277        assert_eq!(v.doc_id, "abc");
3278        let err = serde_json::from_value::<ForgetDocumentArgs>(json!({})).unwrap_err();
3279        assert!(format!("{err}").contains("doc_id"));
3280    }
3281
3282    #[test]
3283    fn ingest_document_rejects_empty_path() {
3284        // Reaches the dispatch arm → handle_ingest_document → empty
3285        // guard fires before the writer is touched. Proves routing.
3286        let runtime = rt();
3287        let h = Harness::new(&runtime);
3288        runtime.block_on(async {
3289            let err = h
3290                .server
3291                .dispatch_tool("memory_ingest_document", json!({ "path": "" }), None)
3292                .await
3293                .expect_err("empty path must error");
3294            let s = format!("{err:?}");
3295            assert!(
3296                s.to_lowercase().contains("path") || s.to_lowercase().contains("must not be empty"),
3297                "got: {s}"
3298            );
3299        });
3300        h.shutdown(&runtime);
3301    }
3302
3303    #[test]
3304    fn search_docs_rejects_empty_query() {
3305        // Empty query trips solo_query::run_doc_search's validation
3306        // → InvalidInput → invalid_params.
3307        let runtime = rt();
3308        let h = Harness::new(&runtime);
3309        runtime.block_on(async {
3310            let err = h
3311                .server
3312                .dispatch_tool("memory_search_docs", json!({ "query": "   " }), None)
3313                .await
3314                .expect_err("empty query must error");
3315            let s = format!("{err:?}");
3316            assert!(
3317                s.to_lowercase().contains("must not be empty")
3318                    || s.to_lowercase().contains("invalid"),
3319                "got: {s}"
3320            );
3321        });
3322        h.shutdown(&runtime);
3323    }
3324
3325    #[test]
3326    fn inspect_document_unknown_id_returns_invalid_params() {
3327        // Valid UUID format but no row exists → handler returns
3328        // invalid_params with the missing id in the message.
3329        let runtime = rt();
3330        let h = Harness::new(&runtime);
3331        runtime.block_on(async {
3332            let err = h
3333                .server
3334                .dispatch_tool(
3335                    "memory_inspect_document",
3336                    json!({ "doc_id": "00000000-0000-7000-8000-000000000000" }),
3337                    None,
3338                )
3339                .await
3340                .expect_err("unknown doc must error");
3341            let s = format!("{err:?}");
3342            assert!(
3343                s.to_lowercase().contains("not found"),
3344                "expected 'not found' message; got: {s}"
3345            );
3346        });
3347        h.shutdown(&runtime);
3348    }
3349
3350    #[test]
3351    fn inspect_document_rejects_malformed_id() {
3352        let runtime = rt();
3353        let h = Harness::new(&runtime);
3354        runtime.block_on(async {
3355            let err = h
3356                .server
3357                .dispatch_tool(
3358                    "memory_inspect_document",
3359                    json!({ "doc_id": "not-a-uuid" }),
3360                    None,
3361                )
3362                .await
3363                .expect_err("malformed doc_id must error");
3364            let s = format!("{err:?}");
3365            assert!(s.contains("invalid doc_id"), "got: {s}");
3366        });
3367        h.shutdown(&runtime);
3368    }
3369
3370    #[test]
3371    fn list_documents_returns_empty_array_on_empty_db() {
3372        let runtime = rt();
3373        let h = Harness::new(&runtime);
3374        runtime.block_on(async {
3375            let r = h
3376                .server
3377                .dispatch_tool("memory_list_documents", json!({}), None)
3378                .await
3379                .expect("list succeeds");
3380            let text = first_text(&r);
3381            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
3382            assert!(v.is_array(), "expected array, got: {text}");
3383            assert_eq!(v.as_array().unwrap().len(), 0);
3384        });
3385        h.shutdown(&runtime);
3386    }
3387
3388    #[test]
3389    fn list_documents_passes_through_limit_offset_include_args() {
3390        let runtime = rt();
3391        let h = Harness::new(&runtime);
3392        runtime.block_on(async {
3393            let r = h
3394                .server
3395                .dispatch_tool(
3396                    "memory_list_documents",
3397                    json!({ "limit": 5, "offset": 10, "include_forgotten": true }),
3398                    None,
3399                )
3400                .await
3401                .expect("list with args succeeds");
3402            let text = first_text(&r);
3403            let v: serde_json::Value = serde_json::from_str(&text).expect("parses as json");
3404            assert!(v.is_array());
3405        });
3406        h.shutdown(&runtime);
3407    }
3408
3409    #[test]
3410    fn forget_document_rejects_malformed_id() {
3411        let runtime = rt();
3412        let h = Harness::new(&runtime);
3413        runtime.block_on(async {
3414            let err = h
3415                .server
3416                .dispatch_tool(
3417                    "memory_forget_document",
3418                    json!({ "doc_id": "not-a-uuid" }),
3419                    None,
3420                )
3421                .await
3422                .expect_err("malformed doc_id must error");
3423            let s = format!("{err:?}");
3424            assert!(s.contains("invalid doc_id"), "got: {s}");
3425        });
3426        h.shutdown(&runtime);
3427    }
3428
3429    // -----------------------------------------------------------------
3430    // v0.9.2 — `memory_remember_batch` + `salience` MCP layer tests.
3431    // -----------------------------------------------------------------
3432
3433    /// salience round-trip through `memory_remember`: an explicit
3434    /// in-range value reaches the writer; an absent value defaults
3435    /// to 0.5; an out-of-range value is rejected with invalid_params.
3436    #[test]
3437    fn remember_with_explicit_salience_round_trips() {
3438        let runtime = rt();
3439        let h = Harness::new(&runtime);
3440        runtime.block_on(async {
3441            let r = h
3442                .server
3443                .dispatch_tool(
3444                    "memory_remember",
3445                    json!({ "content": "with salience", "salience": 0.83 }),
3446                    None,
3447                )
3448                .await
3449                .expect("remember w/ salience succeeds");
3450            let text = first_text(&r);
3451            // Confirmation includes the new MemoryId.
3452            assert!(text.starts_with("remembered "), "got: {text}");
3453        });
3454        h.shutdown(&runtime);
3455    }
3456
3457    #[test]
3458    fn remember_with_out_of_range_salience_returns_invalid_params() {
3459        let runtime = rt();
3460        let h = Harness::new(&runtime);
3461        runtime.block_on(async {
3462            let err = h
3463                .server
3464                .dispatch_tool(
3465                    "memory_remember",
3466                    json!({ "content": "out of range", "salience": 1.5 }),
3467                    None,
3468                )
3469                .await
3470                .unwrap_err();
3471            let s = format!("{err:?}");
3472            assert!(s.contains("salience must be"), "got: {s}");
3473        });
3474        h.shutdown(&runtime);
3475    }
3476
3477    /// Salience boundary: 0.0 and 1.0 are both valid (inclusive range).
3478    #[test]
3479    fn remember_with_boundary_salience_succeeds() {
3480        let runtime = rt();
3481        let h = Harness::new(&runtime);
3482        runtime.block_on(async {
3483            for s in [0.0_f64, 1.0_f64] {
3484                let r = h
3485                    .server
3486                    .dispatch_tool(
3487                        "memory_remember",
3488                        json!({ "content": format!("boundary-{s}"), "salience": s }),
3489                        None,
3490                    )
3491                    .await
3492                    .expect("boundary salience succeeds");
3493                assert!(first_text(&r).starts_with("remembered "));
3494            }
3495        });
3496        h.shutdown(&runtime);
3497    }
3498
3499    /// Happy-path batch: 3 items go in, 3 memory_ids come out in order.
3500    #[test]
3501    fn remember_batch_returns_ids_in_order() {
3502        let runtime = rt();
3503        let h = Harness::new(&runtime);
3504        runtime.block_on(async {
3505            let items = json!([
3506                { "content": "batch-a" },
3507                { "content": "batch-b", "source_type": "user_preference", "salience": 0.9 },
3508                { "content": "batch-c", "salience": 0.1 },
3509            ]);
3510            let r = h
3511                .server
3512                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
3513                .await
3514                .expect("batch succeeds");
3515            let text = first_text(&r);
3516            let parsed: serde_json::Value = serde_json::from_str(&text).expect("reply is JSON");
3517            let arr = parsed.as_array().expect("reply is array");
3518            assert_eq!(arr.len(), 3, "3 items in → 3 ids out: {text}");
3519            // Each entry must be a UUID-shaped string.
3520            for v in arr {
3521                let s = v.as_str().unwrap_or_else(|| panic!("non-string id: {v}"));
3522                assert_eq!(s.len(), 36, "UUID-shaped id expected: {s}");
3523            }
3524            // Distinct ids.
3525            let mut ids: Vec<&str> = arr.iter().map(|v| v.as_str().unwrap()).collect();
3526            ids.sort();
3527            ids.dedup();
3528            assert_eq!(ids.len(), 3, "ids must be distinct: {text}");
3529        });
3530        h.shutdown(&runtime);
3531    }
3532
3533    /// Empty items → invalid_params before any embedding work.
3534    #[test]
3535    fn remember_batch_empty_items_returns_invalid_params() {
3536        let runtime = rt();
3537        let h = Harness::new(&runtime);
3538        runtime.block_on(async {
3539            let err = h
3540                .server
3541                .dispatch_tool("memory_remember_batch", json!({ "items": [] }), None)
3542                .await
3543                .unwrap_err();
3544            let s = format!("{err:?}");
3545            assert!(s.contains("must not be empty"), "got: {s}");
3546        });
3547        h.shutdown(&runtime);
3548    }
3549
3550    /// Per-item validation: empty content trips invalid_params with the
3551    /// index of the offending item baked into the message.
3552    #[test]
3553    fn remember_batch_rejects_per_item_empty_content() {
3554        let runtime = rt();
3555        let h = Harness::new(&runtime);
3556        runtime.block_on(async {
3557            let items = json!([
3558                { "content": "ok-1" },
3559                { "content": "   " },
3560                { "content": "ok-3" },
3561            ]);
3562            let err = h
3563                .server
3564                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
3565                .await
3566                .unwrap_err();
3567            let s = format!("{err:?}");
3568            assert!(s.contains("items[1]"), "must mention items[1]: {s}");
3569            assert!(s.contains("must not be empty"), "got: {s}");
3570        });
3571        h.shutdown(&runtime);
3572    }
3573
3574    /// Per-item validation: out-of-range salience trips invalid_params
3575    /// with the item index in the message.
3576    #[test]
3577    fn remember_batch_rejects_per_item_salience_out_of_range() {
3578        let runtime = rt();
3579        let h = Harness::new(&runtime);
3580        runtime.block_on(async {
3581            let items = json!([
3582                { "content": "ok-1", "salience": 0.5 },
3583                { "content": "out-of-range", "salience": -0.1 },
3584            ]);
3585            let err = h
3586                .server
3587                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
3588                .await
3589                .unwrap_err();
3590            let s = format!("{err:?}");
3591            assert!(s.contains("items[1]"), "must mention items[1]: {s}");
3592            assert!(s.contains("salience must be"), "got: {s}");
3593        });
3594        h.shutdown(&runtime);
3595    }
3596
3597    /// Over-cap batch is rejected at the MCP layer so we never embed
3598    /// 201+ items. Pinned at the same constant as the writer-actor.
3599    #[test]
3600    fn remember_batch_over_cap_returns_invalid_params() {
3601        let runtime = rt();
3602        let h = Harness::new(&runtime);
3603        runtime.block_on(async {
3604            let items: Vec<serde_json::Value> = (0..(solo_storage::MAX_REMEMBER_BATCH_SIZE + 1))
3605                .map(|i| json!({ "content": format!("over-{i}") }))
3606                .collect();
3607            let err = h
3608                .server
3609                .dispatch_tool("memory_remember_batch", json!({ "items": items }), None)
3610                .await
3611                .unwrap_err();
3612            let s = format!("{err:?}");
3613            assert!(
3614                s.contains("MAX_REMEMBER_BATCH_SIZE"),
3615                "must mention the cap: {s}"
3616            );
3617        });
3618        h.shutdown(&runtime);
3619    }
3620
3621    // -----------------------------------------------------------------
3622    // v0.11.0 P3: per-tool progress event tests.
3623    //
3624    // These tests invoke `dispatch_tool` with a real
3625    // `ProgressReporter` wired to a fresh `SessionState`, then drain
3626    // the session's broadcast receiver to observe the emitted events.
3627    // The pattern mirrors `mcp_progress::tests::progress_reporter_*`
3628    // but exercises the full handler call stack (including the writer
3629    // and query pipelines) end-to-end.
3630    // -----------------------------------------------------------------
3631
3632    use crate::mcp_progress::{ProgressReporter, ProgressToken};
3633    use crate::mcp_session::SessionState;
3634    use std::sync::Arc as StdArc2;
3635
3636    fn fresh_progress_session() -> StdArc2<SessionState> {
3637        StdArc2::new(SessionState::new(
3638            solo_core::TenantId::default_tenant(),
3639            None,
3640        ))
3641    }
3642
3643    fn drain_progress_events(
3644        rx: &mut tokio::sync::broadcast::Receiver<crate::mcp_session::McpStreamEvent>,
3645    ) -> Vec<crate::mcp_session::McpStreamEvent> {
3646        let mut out = Vec::new();
3647        while let Ok(ev) = rx.try_recv() {
3648            out.push(ev);
3649        }
3650        out
3651    }
3652
3653    // v0.11.0 P3 note: `ingest_document_emits_progress_at_*` test lives
3654    // in `http::handler_tests` because the dispatch_tests harness uses
3655    // `WriterActor::spawn` (no embedder), so an end-to-end ingest panics
3656    // with "writer has no embedder". The handler_tests harness uses
3657    // `WriterActor::spawn_full` which carries an embedder; we exercise
3658    // the ingest progress checkpoints there.
3659
3660    /// v0.11.0 P3: `memory_search_docs` emits 3 progress events when
3661    /// `top_k` exceeds the threshold (100).
3662    #[test]
3663    fn search_docs_emits_progress_only_when_top_k_above_100() {
3664        let runtime = rt();
3665        let h = Harness::new(&runtime);
3666        runtime.block_on(async {
3667            let session = fresh_progress_session();
3668            let mut rx = session.subscribe_events();
3669            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!(42)));
3670            let _r = h
3671                .server
3672                .dispatch_tool(
3673                    "memory_search_docs",
3674                    json!({ "query": "anything", "limit": 150 }),
3675                    Some(reporter),
3676                )
3677                .await
3678                .expect("search succeeds");
3679            let events = drain_progress_events(&mut rx);
3680            assert_eq!(
3681                events.len(),
3682                3,
3683                "expected 3 search progress events at top_k=150, got {}",
3684                events.len()
3685            );
3686            // Spec shape: every event uses progressToken (echoed as
3687            // number 42) and walks progress 1..=3.
3688            for (i, ev) in events.iter().enumerate() {
3689                let params = &ev.data["params"];
3690                assert_eq!(params["progressToken"], json!(42));
3691                assert_eq!(params["total"], json!(3));
3692                assert_eq!(params["progress"], json!((i + 1) as u64));
3693            }
3694        });
3695        h.shutdown(&runtime);
3696    }
3697
3698    /// v0.11.0 P3: `memory_search_docs` with `top_k <= 100` does NOT
3699    /// emit progress events even when a reporter is wired. Threshold
3700    /// gating per Decision C.
3701    #[test]
3702    fn search_docs_emits_no_progress_when_top_k_below_threshold() {
3703        let runtime = rt();
3704        let h = Harness::new(&runtime);
3705        runtime.block_on(async {
3706            let session = fresh_progress_session();
3707            let mut rx = session.subscribe_events();
3708            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("t")));
3709            let _r = h
3710                .server
3711                .dispatch_tool(
3712                    "memory_search_docs",
3713                    json!({ "query": "anything", "limit": 50 }),
3714                    Some(reporter),
3715                )
3716                .await
3717                .expect("search succeeds");
3718            let events = drain_progress_events(&mut rx);
3719            assert!(
3720                events.is_empty(),
3721                "expected no progress events at top_k=50, got {events:?}"
3722            );
3723        });
3724        h.shutdown(&runtime);
3725    }
3726
3727    /// v0.11.0 P3: `memory_remember_batch` with > 50 items emits
3728    /// per-25-items embed progress + a final "embedded" + "inserted"
3729    /// event. A 51-item batch fires at items 25, 50, then embedded
3730    /// (51/51), then inserted (51/51) = 4 events.
3731    #[test]
3732    fn remember_batch_emits_progress_only_when_size_above_50() {
3733        let runtime = rt();
3734        let h = Harness::new(&runtime);
3735        runtime.block_on(async {
3736            let session = fresh_progress_session();
3737            let mut rx = session.subscribe_events();
3738            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("batch")));
3739            let items: Vec<serde_json::Value> = (0..51)
3740                .map(|i| json!({ "content": format!("item-{i}") }))
3741                .collect();
3742            let _r = h
3743                .server
3744                .dispatch_tool(
3745                    "memory_remember_batch",
3746                    json!({ "items": items }),
3747                    Some(reporter),
3748                )
3749                .await
3750                .expect("batch succeeds");
3751            let events = drain_progress_events(&mut rx);
3752            assert_eq!(
3753                events.len(),
3754                4,
3755                "expected 4 batch progress events for 51 items, got {}: {events:?}",
3756                events.len()
3757            );
3758            // First event = 25/51 "embedding"; second = 50/51 "embedding";
3759            // third = 51/51 "embedded"; fourth = 51/51 "inserted".
3760            let progresses: Vec<u64> = events
3761                .iter()
3762                .map(|e| e.data["params"]["progress"].as_u64().unwrap_or(0))
3763                .collect();
3764            assert_eq!(progresses, vec![25, 50, 51, 51]);
3765            assert_eq!(
3766                events.last().unwrap().data["params"]["message"],
3767                json!("inserted")
3768            );
3769            for ev in &events {
3770                assert_eq!(ev.data["params"]["progressToken"], json!("batch"));
3771                assert_eq!(ev.data["params"]["total"], json!(51));
3772            }
3773        });
3774        h.shutdown(&runtime);
3775    }
3776
3777    /// v0.11.0 P3: small batches (<= 50) do NOT emit progress events
3778    /// even with a reporter wired. Wire-overhead gating per Decision C.
3779    #[test]
3780    fn remember_batch_emits_no_progress_when_size_below_threshold() {
3781        let runtime = rt();
3782        let h = Harness::new(&runtime);
3783        runtime.block_on(async {
3784            let session = fresh_progress_session();
3785            let mut rx = session.subscribe_events();
3786            let reporter = ProgressReporter::new(session.clone(), ProgressToken(json!("t")));
3787            // 5 items — well below the threshold.
3788            let items: Vec<serde_json::Value> = (0..5)
3789                .map(|i| json!({ "content": format!("small-{i}") }))
3790                .collect();
3791            let _r = h
3792                .server
3793                .dispatch_tool(
3794                    "memory_remember_batch",
3795                    json!({ "items": items }),
3796                    Some(reporter),
3797                )
3798                .await
3799                .expect("batch succeeds");
3800            let events = drain_progress_events(&mut rx);
3801            assert!(
3802                events.is_empty(),
3803                "expected no progress events for 5-item batch, got {events:?}"
3804            );
3805        });
3806        h.shutdown(&runtime);
3807    }
3808
3809    /// v0.11.0 P3: stdio-style calls (no session = no progress reporter)
3810    /// must not panic and must produce no events. This pins the
3811    /// backward-compat invariant the rmcp `call_tool` path relies on.
3812    /// Uses `memory_search_docs` (no embedder dependency in the
3813    /// dispatch_tests harness) — the equivalent ingest_document
3814    /// "no progress" guarantee is asserted in `http::handler_tests`
3815    /// via the same `None` path.
3816    #[test]
3817    fn stdio_transport_does_not_emit_progress_events() {
3818        let runtime = rt();
3819        let h = Harness::new(&runtime);
3820        runtime.block_on(async {
3821            // Construct a session purely for the rx end — the tool call
3822            // gets `None`, so the session must NOT receive anything.
3823            let session = fresh_progress_session();
3824            let mut rx = session.subscribe_events();
3825            let _r = h
3826                .server
3827                .dispatch_tool(
3828                    "memory_search_docs",
3829                    // Above the threshold so progress WOULD fire if a
3830                    // reporter were wired — but no reporter = no events.
3831                    json!({ "query": "anything", "limit": 200 }),
3832                    None, // stdio: no reporter
3833                )
3834                .await
3835                .expect("search succeeds without reporter");
3836            let events = drain_progress_events(&mut rx);
3837            assert!(
3838                events.is_empty(),
3839                "stdio path (no reporter) must not publish to ANY session: {events:?}"
3840            );
3841        });
3842        h.shutdown(&runtime);
3843    }
3844
3845    /// v0.11.0 P3: emitted event ids are monotonically increasing per
3846    /// session across multiple tool calls. Pinned to surface any
3847    /// regression in `SessionState::publish_event`'s id allocator.
3848    #[test]
3849    fn progress_event_id_monotonic_per_session() {
3850        let runtime = rt();
3851        let h = Harness::new(&runtime);
3852        runtime.block_on(async {
3853            let session = fresh_progress_session();
3854            let mut rx = session.subscribe_events();
3855            // Two calls in sequence with progress; observe interleaved
3856            // ids stay strictly increasing.
3857            let r1 = ProgressReporter::new(session.clone(), ProgressToken(json!("a")));
3858            let r2 = ProgressReporter::new(session.clone(), ProgressToken(json!("b")));
3859            let _ = h
3860                .server
3861                .dispatch_tool(
3862                    "memory_search_docs",
3863                    json!({ "query": "q1", "limit": 150 }),
3864                    Some(r1),
3865                )
3866                .await;
3867            let _ = h
3868                .server
3869                .dispatch_tool(
3870                    "memory_search_docs",
3871                    json!({ "query": "q2", "limit": 150 }),
3872                    Some(r2),
3873                )
3874                .await;
3875            let events = drain_progress_events(&mut rx);
3876            assert!(events.len() >= 6, "expected at least 6 events: {events:?}");
3877            let ids: Vec<u64> = events.iter().map(|e| e.id).collect();
3878            for w in ids.windows(2) {
3879                assert!(w[0] < w[1], "event ids must be strictly monotonic: {ids:?}");
3880            }
3881        });
3882        h.shutdown(&runtime);
3883    }
3884}
3885
3886// ===========================================================================
3887// v0.8.1 P2: MCP audit principal extraction
3888// ===========================================================================
3889//
3890// These tests live in their own module because they manipulate the
3891// `SOLO_MCP_PRINCIPAL_TOKEN` env var, which is process-global mutable
3892// state. Serialised via a static `Mutex` so cargo test's multi-threaded
3893// runner doesn't race. Pattern mirrors the env-guard discipline in
3894// `solo_cli::commands::common::ollama_overrides_tests`.
3895
3896#[cfg(test)]
3897mod principal_extraction_tests {
3898    use super::*;
3899    use std::sync::Mutex;
3900
3901    /// Serialise tests that mutate `SOLO_MCP_PRINCIPAL_TOKEN`. Poisoned
3902    /// guards are recovered via `into_inner` so one panicking test
3903    /// doesn't sink the rest of the suite.
3904    static ENV_LOCK: Mutex<()> = Mutex::new(());
3905
3906    /// RAII guard that unsets the env var on drop, so a panicking test
3907    /// doesn't leak state into the next case.
3908    struct EnvGuard;
3909    impl Drop for EnvGuard {
3910        fn drop(&mut self) {
3911            // SAFETY: every caller holds ENV_LOCK across construct + drop.
3912            unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
3913        }
3914    }
3915
3916    fn set_principal_env(val: &str) -> EnvGuard {
3917        // SAFETY: ENV_LOCK held by caller.
3918        unsafe { std::env::set_var(ENV_MCP_PRINCIPAL_TOKEN, val) };
3919        EnvGuard
3920    }
3921
3922    fn clear_principal_env() -> EnvGuard {
3923        // SAFETY: ENV_LOCK held by caller.
3924        unsafe { std::env::remove_var(ENV_MCP_PRINCIPAL_TOKEN) };
3925        EnvGuard
3926    }
3927
3928    /// Stdio path: setting `SOLO_MCP_PRINCIPAL_TOKEN` produces a
3929    /// non-None principal at construction time.
3930    #[test]
3931    fn stdio_env_var_resolves_to_principal() {
3932        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3933        let _g = set_principal_env("alice-token");
3934        let resolved = resolve_mcp_principal(None);
3935        assert_eq!(resolved.as_deref(), Some("alice-token"));
3936    }
3937
3938    /// Stdio path: absent env var ⇒ `None` (regression — must preserve
3939    /// v0.8.0 behaviour for users without auth).
3940    #[test]
3941    fn stdio_no_env_var_resolves_to_none() {
3942        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3943        let _g = clear_principal_env();
3944        assert_eq!(resolve_mcp_principal(None), None);
3945    }
3946
3947    /// Stdio path: whitespace-only env var ⇒ `None` (don't pin every
3948    /// audit row to an empty/blank principal because of a launcher
3949    /// typo).
3950    #[test]
3951    fn stdio_whitespace_env_var_resolves_to_none() {
3952        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3953        let _g = set_principal_env("   \t  ");
3954        assert_eq!(resolve_mcp_principal(None), None);
3955    }
3956
3957    /// HTTP-MCP path: `Authorization: Bearer <token>` header resolves
3958    /// to the token as principal.
3959    #[test]
3960    fn http_header_resolves_to_bearer_token_principal() {
3961        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3962        let _g = clear_principal_env();
3963        let resolved = resolve_mcp_principal(Some("Bearer api-token-xyz"));
3964        assert_eq!(resolved.as_deref(), Some("api-token-xyz"));
3965    }
3966
3967    /// Precedence: when both env var AND header carry a token, the
3968    /// header wins (consistent with the rest of the auth stack — JWT
3969    /// claim beats `X-Solo-Tenant` header).
3970    #[test]
3971    fn http_header_beats_env_var() {
3972        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3973        let _g = set_principal_env("env-token");
3974        let resolved = resolve_mcp_principal(Some("Bearer header-token"));
3975        assert_eq!(
3976            resolved.as_deref(),
3977            Some("header-token"),
3978            "header MUST win over env var per documented precedence"
3979        );
3980    }
3981
3982    /// HTTP-MCP path: malformed header (no `Bearer ` prefix) ⇒ falls
3983    /// through to env-var path.
3984    #[test]
3985    fn http_malformed_header_falls_through_to_env() {
3986        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3987        let _g = set_principal_env("env-fallback");
3988        let resolved = resolve_mcp_principal(Some("Basic dXNlcjpwYXNz"));
3989        assert_eq!(resolved.as_deref(), Some("env-fallback"));
3990    }
3991
3992    /// HTTP-MCP path: empty bearer header (`Bearer ` with no token)
3993    /// falls through to env-var path. Matches the spirit of the
3994    /// whitespace-env-var rejection — don't credit a half-formed
3995    /// header.
3996    #[test]
3997    fn http_empty_bearer_header_falls_through_to_env() {
3998        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
3999        let _g = set_principal_env("env-fallback");
4000        let resolved = resolve_mcp_principal(Some("Bearer   "));
4001        assert_eq!(resolved.as_deref(), Some("env-fallback"));
4002    }
4003
4004    /// Across N consecutive calls of `resolve_mcp_principal`, the
4005    /// resolved principal is stable for the same env-var setting
4006    /// (regression guard: an accidental thread-local cache would
4007    /// break the "stable across N tool calls in one session" contract
4008    /// the brief calls out).
4009    #[test]
4010    fn stable_across_multiple_resolutions() {
4011        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
4012        let _g = set_principal_env("stable-token");
4013        for _ in 0..5 {
4014            assert_eq!(resolve_mcp_principal(None).as_deref(), Some("stable-token"));
4015        }
4016    }
4017}
4018
4019/// v0.9.0 P2 tests for the MCP-initialize-time LLM-config gate.
4020///
4021/// Pure-function tests of [`initialize_decision`]: no rmcp Peer is
4022/// constructed (the type's constructors are private), no MCP handshake
4023/// is driven. The wire-up between `initialize_decision` and the
4024/// side-effect path lives in [`SoloMcpServer::initialize`] and is
4025/// covered indirectly by the audit-row tests in
4026/// [`crate::llm::sampling::tests`] — those exercise the same
4027/// `SamplingLlmClient` + `WriteCommand::EmitLlmSamplingAudit` path
4028/// that `populate_sampling_steward` constructs.
4029#[cfg(test)]
4030mod initialize_decision_tests {
4031    use super::*;
4032    use solo_storage::LlmSettings;
4033
4034    /// `[llm]` absent → always Allow (matches v0.8.x behaviour).
4035    #[test]
4036    fn no_llm_block_allows_initialize_regardless_of_sampling_capability() {
4037        assert_eq!(initialize_decision(&None, false), InitializeDecision::Allow);
4038        assert_eq!(initialize_decision(&None, true), InitializeDecision::Allow);
4039    }
4040
4041    /// `[llm] mode = "none"` → always Allow.
4042    #[test]
4043    fn llm_none_allows_initialize_regardless_of_sampling_capability() {
4044        let s = Some(LlmSettings::None);
4045        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
4046        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
4047    }
4048
4049    /// `[llm] mode = "anthropic"` → always Allow.
4050    #[test]
4051    fn llm_anthropic_allows_initialize_regardless_of_sampling_capability() {
4052        let s = Some(LlmSettings::Anthropic {
4053            api_key_env: "ANTHROPIC_API_KEY".into(),
4054            model: "claude-sonnet-4-6".into(),
4055        });
4056        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
4057        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
4058    }
4059
4060    /// `[llm] mode = "ollama"` → always Allow.
4061    #[test]
4062    fn llm_ollama_allows_initialize_regardless_of_sampling_capability() {
4063        let s = Some(LlmSettings::Ollama {
4064            base_url: "http://localhost:11434".into(),
4065            model: "qwen3-coder:30b".into(),
4066        });
4067        assert_eq!(initialize_decision(&s, false), InitializeDecision::Allow);
4068        assert_eq!(initialize_decision(&s, true), InitializeDecision::Allow);
4069    }
4070
4071    /// `[llm] mode = "mcp_sampling"` + peer with sampling capability →
4072    /// populate the slot.
4073    #[test]
4074    fn llm_mcp_sampling_with_sampling_capability_populates_slot() {
4075        let s = Some(LlmSettings::McpSampling);
4076        assert_eq!(
4077            initialize_decision(&s, true),
4078            InitializeDecision::PopulateSamplingSteward
4079        );
4080    }
4081
4082    /// `[llm] mode = "mcp_sampling"` + peer WITHOUT sampling
4083    /// capability → reject initialize with the locked BLOCKER 2 error.
4084    #[test]
4085    fn llm_mcp_sampling_without_sampling_capability_rejects() {
4086        let s = Some(LlmSettings::McpSampling);
4087        assert_eq!(
4088            initialize_decision(&s, false),
4089            InitializeDecision::RejectMissingSamplingCapability
4090        );
4091    }
4092
4093    /// The locked BLOCKER 2 error message body is byte-stable: a future
4094    /// audit-revision can grep these strings and confirm they still
4095    /// land.
4096    #[test]
4097    fn sampling_capability_missing_error_message_contains_all_alternatives() {
4098        let msg = sampling_capability_missing_error_message();
4099        // Banner + four alternative blocks.
4100        assert!(msg.contains("LLM backend `mcp_sampling`"));
4101        assert!(msg.contains("mode = \"anthropic\""));
4102        assert!(msg.contains("api_key_env = \"ANTHROPIC_API_KEY\""));
4103        assert!(msg.contains("mode = \"openai\""));
4104        assert!(msg.contains("api_key_env = \"OPENAI_API_KEY\""));
4105        assert!(msg.contains("mode = \"ollama\""));
4106        assert!(msg.contains("base_url = \"http://localhost:11434\""));
4107        assert!(msg.contains("mode = \"none\""));
4108        // Footer pointer at the release-prep doc.
4109        assert!(msg.contains("docs/releases/v0.9.0.md"));
4110    }
4111}
4112
4113// fetch_recall_rows + RecallHit + RecallRow used to live here. Recall
4114// pipeline moved to solo_query::recall in commit (consolidate-recall);
4115// transports just call solo_query::run_recall and format the result.