lantern 0.2.3 - Docs.rs

//! Model Context Protocol (MCP) server exposing Lantern as tools.
//!
//! Thin adapter over the existing sync modules: each tool opens a fresh
//! store, calls the same function the CLI would, and returns the result as
//! JSON text. The heavy lifting (SQLite, reqwest) stays synchronous and
//! runs inside `spawn_blocking` so it never stalls the tokio reactor.

use std::path::PathBuf;
use std::sync::Arc;

use anyhow::Result;
use rmcp::{
    ErrorData as McpError, ServerHandler, ServiceExt,
    handler::server::{router::tool::ToolRouter, wrapper::Parameters},
    model::{CallToolResult, Content, Implementation, ServerCapabilities, ServerInfo},
    schemars, tool, tool_handler, tool_router,
    transport::stdio,
};
use serde::Deserialize;

use crate::embed::{self, EmbedOptions, EmbeddingBackendFactory, OllamaBackendFactory};
use crate::feedback::{self, FeedbackReport};
use crate::forget;
use crate::ingest;
use crate::inspect;
use crate::search::{self, SearchOptions, SemanticOptions};
use crate::show;
use crate::store::Store;

#[derive(Clone)]
pub struct LanternServer {
    store_dir: PathBuf,
    embed_factory: Arc<dyn EmbeddingBackendFactory>,
    // Populated by the #[tool_router] macro via `Self::tool_router()` and
    // read through the Clone impl by rmcp's dispatcher — so rustc's
    // dead-code analysis can't see the use.
    #[allow(dead_code)]
    tool_router: ToolRouter<LanternServer>,
}

#[derive(Debug, Deserialize, schemars::JsonSchema)]
pub struct IngestArgs {
    /// File or directory to ingest.
    pub path: String,
    /// Override the store directory (default: the server's configured store).
    #[serde(default)]
    pub store: Option<String>,
    /// Bypass `.lantern-ignore` rules (and built-in defaults).
    #[serde(default)]
    pub no_ignore: Option<bool>,
}

#[derive(Debug, Deserialize, schemars::JsonSchema)]
pub struct SearchArgs {
    /// Query string (whitespace-delimited tokens, implicit AND for keyword mode).
    pub query: String,
    /// Maximum number of hits to return (default 10).
    #[serde(default)]
    pub limit: Option<usize>,
    /// Exact match on the source `kind` (e.g. `text/markdown`).
    #[serde(default)]
    pub kind: Option<String>,
    /// Substring that must appear in the source path or URI.
    #[serde(default)]
    pub path: Option<String>,
    /// Search mode: `keyword` (default), `semantic`, or `hybrid`.
    #[serde(default)]
    pub mode: Option<String>,
    /// Ollama embedding model (semantic / hybrid mode).
    #[serde(default)]
    pub model: Option<String>,
    /// Query-side instruction override for supported embedding models.
    #[serde(default)]
    pub instruction: Option<String>,
    /// Ollama base URL (semantic / hybrid mode).
    #[serde(default)]
    pub ollama_url: Option<String>,
}

#[derive(Debug, Deserialize, schemars::JsonSchema)]
pub struct ShowArgs {
    /// Source id (full 32-hex value or any unambiguous prefix).
    pub id: String,
}

#[derive(Debug, Deserialize, schemars::JsonSchema)]
pub struct ForgetArgs {
    /// Substring to match against source path or URI. Minimum 3 characters.
    pub pattern: String,
    /// If true, actually delete matching sources. Defaults to false (dry-run),
    /// which returns matches without modifying the store. The conservative
    /// default protects LLM callers from destructive accidents.
    #[serde(default)]
    pub apply: Option<bool>,
}

#[derive(Debug, Deserialize, schemars::JsonSchema)]
pub struct InspectArgs {
    /// Maximum number of recent sources to list (default 10).
    #[serde(default)]
    pub limit: Option<usize>,
}

#[derive(Debug, Deserialize, schemars::JsonSchema)]
pub struct EmbedArgs {
    /// Ollama embedding model (must be pulled locally).
    #[serde(default)]
    pub model: Option<String>,
    /// Ollama base URL.
    #[serde(default)]
    pub ollama_url: Option<String>,
    /// Stop after embedding this many chunks.
    #[serde(default)]
    pub limit: Option<usize>,
}

#[derive(Debug, Deserialize, schemars::JsonSchema)]
#[serde(rename_all = "lowercase")]
pub enum FeedbackVote {
    Up,
    Down,
}

impl FeedbackVote {
    fn as_feedback(self) -> feedback::Feedback {
        match self {
            FeedbackVote::Up => feedback::Feedback::Up,
            FeedbackVote::Down => feedback::Feedback::Down,
        }
    }
}

#[derive(Debug, Deserialize, schemars::JsonSchema)]
pub struct FeedbackArgs {
    /// Chunk id (full 32-hex value from search output).
    pub chunk_id: String,
    /// Vote direction.
    pub vote: FeedbackVote,
}

#[tool_router]
impl LanternServer {
    pub fn new(store_dir: PathBuf) -> Self {
        Self::with_factory(store_dir, Arc::new(OllamaBackendFactory))
    }

    /// Construct a server with a caller-supplied embedding factory.
    /// Used by tests to inject a deterministic mock backend.
    pub fn with_factory(
        store_dir: PathBuf,
        embed_factory: Arc<dyn EmbeddingBackendFactory>,
    ) -> Self {
        Self {
            store_dir,
            embed_factory,
            tool_router: Self::tool_router(),
        }
    }

    #[tool(
        description = "Ingest a file or directory into the Lantern store. Supports text, markdown, JSONL transcripts, and common source code extensions. Re-ingesting unchanged content is a no-op."
    )]
    async fn lantern_ingest(
        &self,
        Parameters(args): Parameters<IngestArgs>,
    ) -> Result<CallToolResult, McpError> {
        let store_dir = args
            .store
            .map(PathBuf::from)
            .unwrap_or_else(|| self.store_dir.clone());
        let path = PathBuf::from(args.path);
        let no_ignore = args.no_ignore.unwrap_or(false);
        run_blocking(move || {
            let mut store = Store::open(&store_dir)?;
            ingest::ingest_path_with(&mut store, &path, &ingest::IngestOptions { no_ignore })
        })
        .await
        .and_then(|r| json_content(&r))
    }

    #[tool(
        description = "Keyword (FTS5), semantic (cosine over stored embeddings), or hybrid search over ingested chunks. Returns ranked hits with full provenance."
    )]
    async fn lantern_search(
        &self,
        Parameters(args): Parameters<SearchArgs>,
    ) -> Result<CallToolResult, McpError> {
        let srv = self.clone();
        run_blocking(move || srv.search_sync(args))
            .await
            .and_then(|v| json_content(&v))
    }

    #[tool(
        description = "Show full provenance and all chunk text for a single source. Accepts the full source id or any unambiguous prefix."
    )]
    async fn lantern_show(
        &self,
        Parameters(args): Parameters<ShowArgs>,
    ) -> Result<CallToolResult, McpError> {
        let store_dir = self.store_dir.clone();
        let id = args.id;
        run_blocking(move || {
            let store = Store::open(&store_dir)?;
            show::show(&store, &id)
        })
        .await
        .and_then(|s| json_content(&s))
    }

    #[tool(
        description = "Remove indexed sources (and their chunks) whose path or URI contains the given substring. Requires at least 3 characters. Defaults to dry-run; pass apply=true to actually delete."
    )]
    async fn lantern_forget(
        &self,
        Parameters(args): Parameters<ForgetArgs>,
    ) -> Result<CallToolResult, McpError> {
        let store_dir = self.store_dir.clone();
        let pattern = args.pattern;
        let apply = args.apply.unwrap_or(false);
        run_blocking(move || {
            let mut store = Store::open(&store_dir)?;
            forget::forget(&mut store, &pattern, apply)
        })
        .await
        .and_then(|r| json_content(&r))
    }

    #[tool(
        description = "Report store status: schema version, on-disk size, source/chunk/embedding counts, and the most recently ingested sources."
    )]
    async fn lantern_inspect(
        &self,
        Parameters(args): Parameters<InspectArgs>,
    ) -> Result<CallToolResult, McpError> {
        let store_dir = self.store_dir.clone();
        let recent_limit = args.limit.unwrap_or(10);
        run_blocking(move || {
            let store = Store::open(&store_dir)?;
            inspect::inspect(&store, inspect::InspectOptions { recent_limit })
        })
        .await
        .and_then(|r| json_content(&r))
    }

    #[tool(
        description = "Embed every chunk that doesn't yet have a vector for the requested model. Requires a running local Ollama daemon."
    )]
    async fn lantern_embed(
        &self,
        Parameters(args): Parameters<EmbedArgs>,
    ) -> Result<CallToolResult, McpError> {
        let srv = self.clone();
        run_blocking(move || srv.embed_sync(args))
            .await
            .and_then(|r| json_content(&r))
    }

    #[tool(
        description = "Record thumbs-up or thumbs-down feedback for a chunk and return the updated net score. Feedback is additive across repeated calls."
    )]
    async fn lantern_feedback(
        &self,
        Parameters(args): Parameters<FeedbackArgs>,
    ) -> Result<CallToolResult, McpError> {
        let srv = self.clone();
        run_blocking(move || srv.feedback_sync(args))
            .await
            .and_then(|r| json_content(&r))
    }

    /// Synchronous core of `lantern_search`. Exposed so tests can exercise the
    /// exact MCP code path (including backend-factory dispatch) without
    /// standing up a tokio runtime or stdio transport.
    pub fn search_sync(&self, args: SearchArgs) -> Result<serde_json::Value> {
        let store = Store::open(&self.store_dir)?;
        let mode = args
            .mode
            .as_deref()
            .unwrap_or("keyword")
            .to_ascii_lowercase();
        let limit = args.limit.unwrap_or(10);
        let model = args
            .model
            .unwrap_or_else(|| embed::DEFAULT_EMBED_MODEL.to_string());
        let ollama_url = args
            .ollama_url
            .unwrap_or_else(|| embed::DEFAULT_OLLAMA_URL.to_string());
        let instruction = args.instruction;
        let kind = args.kind;
        let path_contains = args.path;
        let query = args.query;

        let hits = match mode.as_str() {
            "semantic" => {
                let backend = self.embed_factory.build(&model, &ollama_url)?;
                search::semantic_search_with(
                    &store,
                    &query,
                    &SemanticOptions {
                        limit,
                        kind,
                        path_contains,
                        model,
                        ollama_url,
                        instruction,
                    },
                    &*backend,
                )?
            }
            "hybrid" => {
                let backend = self.embed_factory.build(&model, &ollama_url)?;
                search::hybrid_search_with(
                    &store,
                    &query,
                    &SemanticOptions {
                        limit,
                        kind,
                        path_contains,
                        model,
                        ollama_url,
                        instruction,
                    },
                    &*backend,
                )?
            }
            "keyword" | "" => search::search(
                &store,
                &query,
                SearchOptions {
                    limit,
                    kind,
                    path_contains,
                },
            )?,
            other => {
                anyhow::bail!("unknown mode {other:?}; expected keyword, semantic, or hybrid")
            }
        };
        Ok(serde_json::json!({ "query": query, "results": hits }))
    }

    /// Synchronous core of `lantern_embed`. Uses the server's injected
    /// factory, so tests can exercise embedding from the MCP path without a
    /// live Ollama daemon.
    pub fn embed_sync(&self, args: EmbedArgs) -> Result<embed::EmbedReport> {
        let mut store = Store::open(&self.store_dir)?;
        let model = args
            .model
            .unwrap_or_else(|| embed::DEFAULT_EMBED_MODEL.to_string());
        let ollama_url = args
            .ollama_url
            .unwrap_or_else(|| embed::DEFAULT_OLLAMA_URL.to_string());
        let backend = self.embed_factory.build(&model, &ollama_url)?;
        embed::embed_missing_with(
            &mut store,
            &EmbedOptions {
                model,
                ollama_url,
                limit: args.limit,
            },
            &*backend,
        )
    }

    /// Synchronous core of `lantern_feedback`. Exposed so tests can exercise
    /// the exact MCP code path without needing the transport layer.
    pub fn feedback_sync(&self, args: FeedbackArgs) -> Result<FeedbackReport> {
        let store = Store::open(&self.store_dir)?;
        feedback::apply_feedback(&store, &args.chunk_id, args.vote.as_feedback())
    }
}

#[tool_handler]
impl ServerHandler for LanternServer {
    fn get_info(&self) -> ServerInfo {
        ServerInfo::new(ServerCapabilities::builder().enable_tools().build())
            .with_server_info(Implementation::from_build_env())
            .with_instructions(String::from(
                "Lantern MCP server: provenance-aware local memory for agents. \
                 Tools: lantern_ingest, lantern_search, lantern_show, \
                 lantern_forget, lantern_inspect, lantern_embed, \
                 lantern_feedback.",
            ))
    }
}

/// Start the MCP server. If `port` is `Some`, listen on that TCP port and
/// serve the first connection; otherwise run over stdio.
pub fn run(store_dir: PathBuf, port: Option<u16>) -> Result<()> {
    let runtime = tokio::runtime::Builder::new_current_thread()
        .enable_all()
        .build()?;
    runtime.block_on(async move {
        let server = LanternServer::new(store_dir);
        match port {
            None => {
                let service = server.serve(stdio()).await?;
                service.waiting().await?;
            }
            Some(p) => {
                let listener = tokio::net::TcpListener::bind(("127.0.0.1", p)).await?;
                eprintln!("lantern mcp listening on 127.0.0.1:{p}");
                let (stream, peer) = listener.accept().await?;
                eprintln!("lantern mcp client connected from {peer}");
                let service = server.serve(stream).await?;
                service.waiting().await?;
            }
        }
        Ok::<_, anyhow::Error>(())
    })
}

fn json_content<T: serde::Serialize>(value: &T) -> Result<CallToolResult, McpError> {
    let text = serde_json::to_string_pretty(value)
        .map_err(|e| McpError::internal_error(format!("serialize result: {e}"), None))?;
    Ok(CallToolResult::success(vec![Content::text(text)]))
}

async fn run_blocking<F, T>(f: F) -> Result<T, McpError>
where
    F: FnOnce() -> anyhow::Result<T> + Send + 'static,
    T: Send + 'static,
{
    tokio::task::spawn_blocking(f)
        .await
        .map_err(|e| McpError::internal_error(format!("spawn_blocking: {e}"), None))?
        .map_err(|e| McpError::internal_error(format!("{e:#}"), None))
}