1use rmcp::{
7 ServerHandler,
8 handler::server::tool::ToolRouter,
9 model::*,
10 tool, tool_handler, tool_router,
11 handler::server::wrapper::Parameters,
12 ErrorData as McpError,
13};
14use schemars::JsonSchema;
15use serde::Deserialize;
16use tracing::{error, info};
17
18use crate::{
19 api::scrape::scrape_core_logic,
20 crawler::{crawl_website, mapper},
21 search::SearchProvider,
22 types::{
23 CrawlRequest, MapRequest, ScrapeRequest,
24 },
25};
26
27#[derive(Debug, Clone, Deserialize, JsonSchema)]
33pub struct ScrapeParams {
34 pub url: String,
36
37 #[serde(default)]
40 pub formats: Option<Vec<String>>,
41
42 #[serde(default)]
45 pub engine: Option<String>,
46
47 #[serde(default)]
49 pub timeout_ms: Option<u64>,
50}
51
52#[derive(Debug, Clone, Deserialize, JsonSchema)]
54pub struct MapParams {
55 pub url: String,
57
58 #[serde(default)]
60 pub search: Option<String>,
61
62 #[serde(default)]
64 pub ignore_sitemap: Option<bool>,
65
66 #[serde(default)]
68 pub include_subdomains: Option<bool>,
69
70 #[serde(default)]
72 pub limit: Option<u32>,
73}
74
75#[derive(Debug, Clone, Deserialize, JsonSchema)]
77pub struct CrawlParams {
78 pub url: String,
80
81 #[serde(default)]
83 pub max_depth: Option<u32>,
84
85 #[serde(default)]
87 pub limit: Option<u32>,
88
89 #[serde(default)]
91 pub include_paths: Option<Vec<String>>,
92
93 #[serde(default)]
95 pub exclude_paths: Option<Vec<String>>,
96
97 #[serde(default)]
99 pub allow_backward_links: Option<bool>,
100
101 #[serde(default)]
103 pub allow_external_links: Option<bool>,
104}
105
106#[derive(Debug, Clone, Deserialize, JsonSchema)]
108pub struct SearchParams {
109 pub query: String,
111
112 #[serde(default)]
114 pub limit: Option<u32>,
115
116 #[serde(default)]
118 pub scrape_results: Option<bool>,
119}
120
121#[derive(Clone)]
127pub struct EssenceMcpServer {
128 tool_router: ToolRouter<Self>,
129}
130
131#[tool_router]
132impl EssenceMcpServer {
133 #[allow(clippy::new_without_default)]
135 pub fn new() -> Self {
136 Self {
137 tool_router: Self::tool_router(),
138 }
139 }
140
141 #[tool(description = "Scrape a single web page and return its content as Markdown (default), HTML, or other formats. Uses intelligent HTTP -> Browser fallback for reliability.")]
145 async fn scrape(
146 &self,
147 Parameters(params): Parameters<ScrapeParams>,
148 ) -> Result<CallToolResult, McpError> {
149 info!("MCP tool call: scrape url={}", params.url);
150
151 let request = ScrapeRequest {
152 url: params.url.clone(),
153 formats: params.formats.unwrap_or_else(|| vec!["markdown".to_string()]),
154 engine: params.engine.unwrap_or_else(|| "auto".to_string()),
155 timeout: params.timeout_ms.unwrap_or(30000),
156 ..ScrapeRequest::default()
157 };
158
159 match scrape_core_logic(&request).await {
160 Ok(response) => {
161 let json = serde_json::to_string_pretty(&response).map_err(|e| {
162 McpError::internal_error(
163 format!("Failed to serialize scrape response: {}", e),
164 None,
165 )
166 })?;
167 Ok(CallToolResult::success(vec![Content::text(json)]))
168 }
169 Err(e) => {
170 error!("MCP scrape error for {}: {}", params.url, e);
171 Ok(CallToolResult::error(vec![Content::text(format!(
172 "Scrape failed: {}",
173 e
174 ))]))
175 }
176 }
177 }
178
179 #[tool(description = "Discover URLs from a website via sitemaps and in-page link extraction. Returns a list of discovered URLs.")]
181 async fn map(
182 &self,
183 Parameters(params): Parameters<MapParams>,
184 ) -> Result<CallToolResult, McpError> {
185 info!("MCP tool call: map url={}", params.url);
186
187 let map_request = MapRequest {
188 url: params.url.clone(),
189 search: params.search,
190 ignore_sitemap: params.ignore_sitemap,
191 include_subdomains: params.include_subdomains.or(Some(true)),
192 limit: params.limit.or(Some(5000)),
193 };
194
195 match mapper::discover_urls(¶ms.url, &map_request).await {
196 Ok(links) => {
197 let result = serde_json::json!({
198 "success": true,
199 "count": links.len(),
200 "links": links
201 });
202 let json = serde_json::to_string_pretty(&result).map_err(|e| {
203 McpError::internal_error(
204 format!("Failed to serialize map response: {}", e),
205 None,
206 )
207 })?;
208 Ok(CallToolResult::success(vec![Content::text(json)]))
209 }
210 Err(e) => {
211 error!("MCP map error for {}: {}", params.url, e);
212 Ok(CallToolResult::error(vec![Content::text(format!(
213 "Map failed: {}",
214 e
215 ))]))
216 }
217 }
218 }
219
220 #[tool(description = "Crawl a website starting from a URL, following links up to a specified depth and page limit. Returns scraped content from all crawled pages.")]
222 async fn crawl(
223 &self,
224 Parameters(params): Parameters<CrawlParams>,
225 ) -> Result<CallToolResult, McpError> {
226 info!("MCP tool call: crawl url={}", params.url);
227
228 let crawl_request = CrawlRequest {
229 url: params.url.clone(),
230 max_depth: params.max_depth.unwrap_or(2),
231 limit: params.limit.unwrap_or(100),
232 include_paths: params.include_paths,
233 exclude_paths: params.exclude_paths,
234 allow_backward_links: params.allow_backward_links,
235 allow_external_links: params.allow_external_links,
236 ignore_sitemap: None,
237 detect_pagination: Some(true),
238 max_pagination_pages: Some(50),
239 use_parallel: None,
240 };
241
242 match crawl_website(&crawl_request).await {
243 Ok(documents) => {
244 let result = serde_json::json!({
245 "success": true,
246 "pages_crawled": documents.len(),
247 "data": documents
248 });
249 let json = serde_json::to_string_pretty(&result).map_err(|e| {
250 McpError::internal_error(
251 format!("Failed to serialize crawl response: {}", e),
252 None,
253 )
254 })?;
255 Ok(CallToolResult::success(vec![Content::text(json)]))
256 }
257 Err(e) => {
258 error!("MCP crawl error for {}: {}", params.url, e);
259 Ok(CallToolResult::error(vec![Content::text(format!(
260 "Crawl failed: {}",
261 e
262 ))]))
263 }
264 }
265 }
266
267 #[tool(description = "Search the web using DuckDuckGo and optionally scrape each result page for full content. Returns search results with titles, URLs, and snippets.")]
269 async fn search(
270 &self,
271 Parameters(params): Parameters<SearchParams>,
272 ) -> Result<CallToolResult, McpError> {
273 info!("MCP tool call: search query={}", params.query);
274
275 let provider = SearchProvider::new().map_err(|e| {
276 McpError::internal_error(
277 format!("Failed to create search provider: {}", e),
278 None,
279 )
280 })?;
281
282 let limit = params.limit.unwrap_or(10);
283
284 let mut results = provider
285 .search_duckduckgo(¶ms.query, limit)
286 .await
287 .map_err(|e| {
288 McpError::internal_error(
289 format!("Search failed: {}", e),
290 None,
291 )
292 })?;
293
294 if params.scrape_results.unwrap_or(false) {
296 info!("Scraping {} search results", results.len());
297 for result in &mut results {
298 let scrape_req = ScrapeRequest {
299 url: result.url.clone(),
300 formats: vec!["markdown".to_string()],
301 engine: "http".to_string(),
302 timeout: 10000,
303 only_main_content: true,
304 ..ScrapeRequest::default()
305 };
306 match scrape_core_logic(&scrape_req).await {
307 Ok(response) => {
308 if let Some(data) = response.data {
309 result.content = Some(data);
310 }
311 }
312 Err(e) => {
313 error!("Failed to scrape search result {}: {}", result.url, e);
314 }
315 }
316 }
317 }
318
319 let response = serde_json::json!({
320 "success": true,
321 "count": results.len(),
322 "data": results
323 });
324 let json = serde_json::to_string_pretty(&response).map_err(|e| {
325 McpError::internal_error(
326 format!("Failed to serialize search response: {}", e),
327 None,
328 )
329 })?;
330 Ok(CallToolResult::success(vec![Content::text(json)]))
331 }
332}
333
334#[tool_handler]
339impl ServerHandler for EssenceMcpServer {
340 fn get_info(&self) -> ServerInfo {
341 ServerInfo {
342 server_info: Implementation {
343 name: "essence".to_string(),
344 title: Some("Essence Web Retrieval Engine".to_string()),
345 version: env!("CARGO_PKG_VERSION").to_string(),
346 description: Some(
347 "Production-ready web retrieval engine with intelligent HTTP->Browser fallback, \
348 providing LLM-ready Markdown outputs. Supports scraping, crawling, URL discovery, \
349 and web search."
350 .to_string(),
351 ),
352 icons: None,
353 website_url: None,
354 },
355 capabilities: ServerCapabilities::builder()
356 .enable_tools()
357 .build(),
358 instructions: Some(
359 "Essence is a web retrieval engine. Use the 'scrape' tool to fetch a single page, \
360 'map' to discover URLs on a site, 'crawl' to traverse multiple pages, or 'search' \
361 to find pages via DuckDuckGo web search. All tools return structured JSON with \
362 Markdown content suitable for LLM consumption."
363 .to_string(),
364 ),
365 ..Default::default()
366 }
367 }
368}