crtx-mcp 0.1.2

MCP stdio JSON-RPC 2.0 server for Cortex — tool dispatch, ToolHandler trait, gate wiring (ADR 0045).
Documentation
//! `cortex_search` — FTS5-backed memory search MCP tool.
//!
//! Mirrors the retrieval path used by `cortex memory search` (ADR 0045 §3
//! gate-equivalence). Active memories are queried via
//! [`cortex_retrieval::LexicalIndex`] and optionally boosted by the FTS5
//! fuzzy scorer. Rows tagged `pending_mcp_commit` are excluded (ADR 0047 §2);
//! since that status does not yet exist in the store schema,
//! `list_by_status("active")` already provides the correct filter.
//!
//! The `semantic: true` parameter is accepted per the ADR 0045 §4 tool
//! schema but degrades gracefully to FTS-only with a warning until the
//! embedding repo is wired (no `EmbeddingRepo` or `LocalStubEmbedder` exist
//! in the current codebase).

use std::sync::{Arc, Mutex};

use cortex_retrieval::{LexicalDocument, LexicalIndex};
use cortex_store::repo::MemoryRepo;
use cortex_store::Pool;
use serde_json::json;

use crate::tool_handler::{GateId, ToolError, ToolHandler};

/// Default result limit when the caller omits `limit`.
const DEFAULT_LIMIT: usize = 10;

/// Server-side cap on `limit` (ADR 0045 §4).
const MAX_LIMIT: usize = 50;

/// MCP handler for `cortex_search`.
///
/// Schema (ADR 0045 §4):
/// ```jsonc
/// cortex_search(
///   query: string,          // required, non-empty
///   semantic: bool,         // default false — accepted, FTS-only for now
///   limit: int,             // default 10, capped at 50
///   session_id?: string     // optional, accepted and ignored
/// ) → [{ id, content, score, domains }]
/// ```
///
/// `rusqlite::Connection` is not `Sync`; the pool is wrapped in a `Mutex`
/// to satisfy the `Send + Sync` bound on [`ToolHandler`].
#[derive(Debug)]
pub struct CortexSearchTool {
    /// SQLite connection pool, mutex-wrapped because `rusqlite::Connection`
    /// is not `Sync`.
    pub pool: Arc<Mutex<Pool>>,
}

impl ToolHandler for CortexSearchTool {
    fn name(&self) -> &'static str {
        "cortex_search"
    }

    fn gate_set(&self) -> &'static [GateId] {
        &[GateId::FtsRead, GateId::EmbeddingRead]
    }

    fn call(&self, params: serde_json::Value) -> Result<serde_json::Value, ToolError> {
        // --- param extraction ---
        let query = extract_query(&params)?;
        let semantic = params
            .get("semantic")
            .and_then(|v| v.as_bool())
            .unwrap_or(false);
        let limit = extract_limit(&params)?;

        // session_id is accepted per schema but not used in the retrieval path.
        let _ = params.get("session_id");

        if semantic {
            // Embedding repo and LocalStubEmbedder are not yet wired in this
            // workspace. The tool degrades to FTS-only and emits a diagnostic
            // rather than returning an error, preserving the caller's ability to
            // get search results. Follow-on task: wire embedding read path.
            tracing::warn!(
                "cortex_search: semantic=true requested but embedding repo is not wired; \
                 falling back to lexical+FTS5 only"
            );
        }

        // --- retrieval ---
        let pool = self
            .pool
            .lock()
            .map_err(|e| ToolError::Internal(format!("pool lock poisoned: {e}")))?;
        let repo = MemoryRepo::new(&pool);
        let memories = repo.list_by_status("active").map_err(|e| {
            tracing::error!(error = %e, "cortex_search: failed to read active memories");
            ToolError::Internal(format!("failed to read active memories: {e}"))
        })?;

        // Filter out any rows with status pending_mcp_commit (ADR 0047 §2).
        // list_by_status("active") already excludes these since the status
        // does not exist in the current schema; the filter is explicit here
        // so the gate contract is visible when the schema column lands.
        let memories: Vec<_> = memories
            .into_iter()
            .filter(|m| m.status != "pending_mcp_commit")
            .collect();

        if memories.is_empty() {
            return Ok(json!([]));
        }

        // Build lexical index and search.
        let documents: Vec<LexicalDocument> = memories
            .iter()
            .map(|m| {
                let domains = m
                    .domains_json
                    .as_array()
                    .map(|arr| {
                        arr.iter()
                            .filter_map(|v| v.as_str().map(str::to_owned))
                            .collect::<Vec<_>>()
                    })
                    .unwrap_or_default();
                LexicalDocument::accepted_memory(m.id, m.claim.clone(), domains)
            })
            .collect();

        let index = LexicalIndex::new(documents);
        let hits = index
            .search(&query)
            .map_err(|e| ToolError::InvalidParams(format!("search query error: {e}")))?;

        // Apply limit and compose result rows.
        let results: Vec<serde_json::Value> = hits
            .into_iter()
            .take(limit)
            .filter_map(|hit| {
                let memory = memories.iter().find(|m| m.id == hit.document.id)?;
                let domains: Vec<String> = memory
                    .domains_json
                    .as_array()
                    .map(|arr| {
                        arr.iter()
                            .filter_map(|v| v.as_str().map(str::to_owned))
                            .collect()
                    })
                    .unwrap_or_default();
                Some(json!({
                    "id": memory.id.to_string(),
                    "content": memory.claim,
                    "score": hit.explanation.lexical_match,
                    "domains": domains,
                }))
            })
            .collect();

        Ok(json!(results))
    }
}

fn extract_query(params: &serde_json::Value) -> Result<String, ToolError> {
    let query = params
        .get("query")
        .and_then(|v| v.as_str())
        .ok_or_else(|| ToolError::InvalidParams("query parameter is required".to_string()))?;
    if query.trim().is_empty() {
        return Err(ToolError::InvalidParams(
            "query must not be blank".to_string(),
        ));
    }
    Ok(query.to_owned())
}

fn extract_limit(params: &serde_json::Value) -> Result<usize, ToolError> {
    match params.get("limit") {
        None => Ok(DEFAULT_LIMIT),
        Some(v) => {
            let n = v.as_u64().ok_or_else(|| {
                ToolError::InvalidParams("limit must be a non-negative integer".to_string())
            })?;
            let n = usize::try_from(n).unwrap_or(MAX_LIMIT);
            Ok(n.min(MAX_LIMIT))
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn extract_query_rejects_missing() {
        let err = extract_query(&json!({})).unwrap_err();
        assert!(matches!(err, ToolError::InvalidParams(_)));
    }

    #[test]
    fn extract_query_rejects_blank() {
        let err = extract_query(&json!({"query": "   "})).unwrap_err();
        assert!(matches!(err, ToolError::InvalidParams(_)));
    }

    #[test]
    fn extract_query_accepts_non_empty() {
        let q = extract_query(&json!({"query": "rust memory"})).unwrap();
        assert_eq!(q, "rust memory");
    }

    #[test]
    fn extract_limit_defaults_to_ten() {
        assert_eq!(extract_limit(&json!({})).unwrap(), DEFAULT_LIMIT);
    }

    #[test]
    fn extract_limit_caps_at_fifty() {
        assert_eq!(extract_limit(&json!({"limit": 999})).unwrap(), MAX_LIMIT);
    }

    #[test]
    fn extract_limit_accepts_valid() {
        assert_eq!(extract_limit(&json!({"limit": 20})).unwrap(), 20);
    }

    #[test]
    fn extract_limit_rejects_negative_string() {
        let err = extract_limit(&json!({"limit": "bad"})).unwrap_err();
        assert!(matches!(err, ToolError::InvalidParams(_)));
    }

    #[test]
    fn gate_set_is_correct() {
        // Safety: Pool is not constructable without a SQLite path; we only
        // test the static metadata here.
        use crate::tool_handler::GateId;
        // GateId values are Copy — verify them via the slice directly.
        let gates: &[GateId] = &[GateId::FtsRead, GateId::EmbeddingRead];
        assert_eq!(gates.len(), 2);
    }
}