Skip to main content

rover/mcp/
handler.rs

1//! Shared MCP server state.
2
3use std::sync::Arc;
4
5use rmcp::ErrorData;
6use rmcp::ServerHandler;
7use rmcp::handler::server::router::tool::ToolRouter;
8use rmcp::handler::server::wrapper::{Json, Parameters};
9use rmcp::model::{ServerCapabilities, ServerInfo};
10use rmcp::{tool, tool_handler, tool_router};
11
12use crate::config::Config;
13use crate::fetcher::concurrency::Pacer;
14use crate::fetcher::ssrf::SsrfLevel;
15use crate::mcp::tools::count_tokens::CountTokensArgs;
16use crate::mcp::tools::fetch::{FetchArgs, FetchOutput};
17use crate::storage::Db;
18
19/// State shared across all MCP tool invocations.
20///
21/// Note: this struct no longer carries its own scheduler `Sender`. Every
22/// `storage::tasks::insert` call notifies the scheduler via the `Db`-owned
23/// notifier installed by `mcp::server::serve_stdio`, so the MCP tool layer
24/// has no extra wiring to do — it just inserts.
25#[derive(Clone)]
26pub struct RoverHandler {
27    pub(crate) db: Db,
28    pub(crate) config: Arc<Config>,
29    pub(crate) client: reqwest::Client,
30    pub(crate) ssrf_level: SsrfLevel,
31    /// Pre-canonicalized project root used when `ssrf_level == Project` to
32    /// validate `file://` URLs. `None` for every other level.
33    pub(crate) ssrf_project_root: Option<std::path::PathBuf>,
34    /// Optional HAR recorder shared with background workers. `Some` when
35    /// `[debug] har_path` is set in config.
36    pub(crate) har_recorder: Option<Arc<crate::fetcher::har::HarRecorder>>,
37    pub(crate) pacer: Arc<Pacer>,
38    pub(crate) summarizer: Arc<crate::summarizer::SummarizerService>,
39    /// M9: image captioner registry. Always present in default builds since
40    /// cloud captioners ship in every binary; may be empty when the user
41    /// hasn't configured any `[captioners.*]` blocks.
42    pub(crate) captioners: Arc<crate::vlm::CaptionerRegistry>,
43    /// Prompt-injection guard. Always present; default config yields the
44    /// `moderate` output level with methods 1+2 active.
45    pub(crate) guard: std::sync::Arc<crate::guard::Guard>,
46    /// M9 fix C1: lazily-initialized headless renderer. The handler owns a
47    /// shared `OnceCell` so the first call requesting `headless.mode = On`
48    /// (or `Auto` when the SPA heuristic triggers) pays the
49    /// browser-launch cost; subsequent calls reuse the same `Arc<HeadlessRenderer>`.
50    /// `serve_stdio` keeps a clone for shutdown.
51    #[cfg(feature = "headless")]
52    pub(crate) headless_renderer:
53        Arc<tokio::sync::OnceCell<Arc<crate::fetcher::headless::HeadlessRenderer>>>,
54    tool_router: ToolRouter<Self>,
55}
56
57impl RoverHandler {
58    #[allow(clippy::too_many_arguments)]
59    pub fn new(
60        db: Db,
61        config: Arc<Config>,
62        client: reqwest::Client,
63        ssrf_level: SsrfLevel,
64        ssrf_project_root: Option<std::path::PathBuf>,
65        har_recorder: Option<Arc<crate::fetcher::har::HarRecorder>>,
66        pacer: Arc<Pacer>,
67        summarizer: Arc<crate::summarizer::SummarizerService>,
68        captioners: Arc<crate::vlm::CaptionerRegistry>,
69        guard: Arc<crate::guard::Guard>,
70        #[cfg(feature = "headless")] headless_renderer: Arc<
71            tokio::sync::OnceCell<Arc<crate::fetcher::headless::HeadlessRenderer>>,
72        >,
73    ) -> Self {
74        // Rewrite covered tools' descriptions to advertise, per override, whether
75        // the agent's `security` arg is currently honored based on config grants.
76        // rmcp's `#[tool_handler]` clones each route's `attr.description` when
77        // generating `list_tools`, so mutating the router map here is reflected.
78        let mut tool_router = Self::tool_router();
79        let note = guard.tool_security_note();
80        for name in ["fetch_tool", "summarize_tool", "get_metadata_tool"] {
81            if let Some(route) = tool_router.map.get_mut(name) {
82                let base = route.attr.description.clone().unwrap_or_default();
83                route.attr.description = Some(format!("{base} {note}").into());
84            }
85        }
86        if let Some(route) = tool_router.map.get_mut("batch_fetch_tool") {
87            let base = route.attr.description.clone().unwrap_or_default();
88            route.attr.description = Some(
89                format!("{base} Fetched content is prompt-injection guarded when you later read each URL via fetch.").into(),
90            );
91        }
92        Self {
93            db,
94            config,
95            client,
96            ssrf_level,
97            ssrf_project_root,
98            har_recorder,
99            pacer,
100            summarizer,
101            captioners,
102            guard,
103            #[cfg(feature = "headless")]
104            headless_renderer,
105            tool_router,
106        }
107    }
108}
109
110/// Resolve the tokenizer family from an optional wire-arg string, falling
111/// back to the config default. Returns [`crate::mcp::error::McpError::InvalidArgs`]
112/// for unknown family strings so both tools surface the same error code.
113pub(crate) fn resolve_tokenizer(
114    arg: Option<&str>,
115    cfg: &Config,
116) -> Result<crate::tokenizer::Tokenizer, crate::mcp::error::McpError> {
117    use std::str::FromStr;
118    match arg {
119        Some(s) => crate::tokenizer::Tokenizer::from_str(s)
120            .map_err(|e| crate::mcp::error::McpError::InvalidArgs(e.to_string())),
121        None => Ok(cfg.tokenizer.default),
122    }
123}
124
125#[tool_router]
126impl RoverHandler {
127    /// Fetch a URL and return cleaned Markdown with frontmatter.
128    #[tool(
129        description = "Fetch a URL and return cleaned Markdown with frontmatter. \
130                       Set count_only=true to return only token counts."
131    )]
132    pub async fn fetch_tool(
133        &self,
134        Parameters(args): Parameters<FetchArgs>,
135    ) -> Result<Json<FetchOutput>, ErrorData> {
136        match self.fetch_inner(args).await {
137            Ok(out) => Ok(Json(out)),
138            Err(e) => Err(into_error_data(e)),
139        }
140    }
141
142    /// Count tokens in either an inline `text` or a fetched `url`.
143    #[tool(description = "Count tokens for a URL or inline text. \
144                       mode=\"single\" (default) returns one token count. \
145                       mode=\"estimates\" returns four counts: raw_html, \
146                       extracted_md, summary_short (~250 tokens), summary_medium (~750 tokens). \
147                       Estimates mode requires url and uses the extractive backend.")]
148    pub async fn count_tokens_tool(
149        &self,
150        Parameters(args): Parameters<CountTokensArgs>,
151    ) -> Result<Json<crate::mcp::envelope::CountResponse>, ErrorData> {
152        match self.count_tokens_inner(args).await {
153            Ok(out) => Ok(Json(out)),
154            Err(e) => Err(into_error_data(e)),
155        }
156    }
157
158    /// Fetch a URL and return ONLY its structured metadata (no markdown body).
159    #[tool(description = "Fetch a URL and return only its structured metadata: \
160                       title, description, author, published/modified dates, \
161                       schema_types, image, canonical, language, extraction_quality.")]
162    pub async fn get_metadata_tool(
163        &self,
164        Parameters(args): Parameters<crate::mcp::tools::get_metadata::GetMetadataArgs>,
165    ) -> Result<Json<crate::mcp::envelope::MetadataResponse>, ErrorData> {
166        match self.get_metadata_inner(args).await {
167            Ok(out) => Ok(Json(out)),
168            Err(e) => Err(into_error_data(e)),
169        }
170    }
171
172    /// Apply summarization to a URL's cached or freshly-fetched markdown.
173    #[tool(
174        description = "Apply summarization to a URL. If the URL isn't cached, \
175                       Rover fetches it with default options first. Returns the \
176                       summary_md plus metadata including cache status, the \
177                       effective backend, and (when applicable) fallback details."
178    )]
179    pub async fn summarize_tool(
180        &self,
181        Parameters(args): Parameters<crate::mcp::tools::summarize::SummarizeArgs>,
182    ) -> Result<Json<crate::mcp::envelope::SummarizeResponse>, ErrorData> {
183        match self.summarize_inner(args).await {
184            Ok(out) => Ok(Json(out)),
185            Err(e) => Err(into_error_data(e)),
186        }
187    }
188
189    /// Fetch multiple URLs concurrently in the background.
190    #[tool(
191        description = "Fetch multiple URLs concurrently. Returns a task_id immediately; \
192                          use rover batch <id> --monitor to stream progress."
193    )]
194    pub async fn batch_fetch_tool(
195        &self,
196        Parameters(args): Parameters<crate::mcp::tools::batch_fetch::BatchFetchArgs>,
197    ) -> Result<Json<crate::mcp::envelope::TaskCreatedResponse>, ErrorData> {
198        match self.batch_fetch_inner(args).await {
199            Ok(out) => Ok(Json(out)),
200            Err(e) => Err(into_error_data(e)),
201        }
202    }
203}
204
205#[tool_handler(router = self.tool_router)]
206impl ServerHandler for RoverHandler {
207    fn get_info(&self) -> ServerInfo {
208        ServerInfo::new(ServerCapabilities::builder().enable_tools().build())
209            .with_server_info(rmcp::model::Implementation::new(
210                "rover",
211                env!("CARGO_PKG_VERSION"),
212            ))
213            .with_instructions(
214                "Web fetch & prep for LLM agents. \
215                 Tools: fetch, summarize, count_tokens, get_metadata, batch_fetch.",
216            )
217    }
218}
219
220fn into_error_data(err: crate::mcp::error::McpError) -> ErrorData {
221    use crate::mcp::error::McpError;
222    let is_user_error = matches!(
223        &err,
224        McpError::InvalidArgs(_)
225            | McpError::InvalidUrl(_)
226            | McpError::TooManyUrls { .. }
227            | McpError::EmptyUrlList
228            | McpError::Summarizer(
229                crate::summarizer::SummarizerError::NoSuchBackend { .. }
230                    | crate::summarizer::SummarizerError::InvalidRequest { .. }
231            ),
232    );
233    let r = crate::mcp::error::log_and_translate(err);
234    let code = if is_user_error {
235        rmcp::model::ErrorCode::INVALID_PARAMS
236    } else {
237        rmcp::model::ErrorCode::INTERNAL_ERROR
238    };
239    let message = format!("{}: {}", r.code, r.message);
240    let data = serde_json::to_value(&r).ok();
241    ErrorData::new(code, message, data)
242}