use rmcp::{
ServerHandler,
handler::server::tool::ToolRouter,
model::*,
tool, tool_handler, tool_router,
handler::server::wrapper::Parameters,
ErrorData as McpError,
};
use schemars::JsonSchema;
use serde::Deserialize;
use tracing::{error, info};
use crate::{
api::scrape::scrape_core_logic,
crawler::{crawl_website, mapper},
search::SearchProvider,
types::{
CrawlRequest, MapRequest, ScrapeRequest,
},
};
#[derive(Debug, Clone, Deserialize, JsonSchema)]
pub struct ScrapeParams {
pub url: String,
#[serde(default)]
pub formats: Option<Vec<String>>,
#[serde(default)]
pub engine: Option<String>,
#[serde(default)]
pub timeout_ms: Option<u64>,
}
#[derive(Debug, Clone, Deserialize, JsonSchema)]
pub struct MapParams {
pub url: String,
#[serde(default)]
pub search: Option<String>,
#[serde(default)]
pub ignore_sitemap: Option<bool>,
#[serde(default)]
pub include_subdomains: Option<bool>,
#[serde(default)]
pub limit: Option<u32>,
}
#[derive(Debug, Clone, Deserialize, JsonSchema)]
pub struct CrawlParams {
pub url: String,
#[serde(default)]
pub max_depth: Option<u32>,
#[serde(default)]
pub limit: Option<u32>,
#[serde(default)]
pub include_paths: Option<Vec<String>>,
#[serde(default)]
pub exclude_paths: Option<Vec<String>>,
#[serde(default)]
pub allow_backward_links: Option<bool>,
#[serde(default)]
pub allow_external_links: Option<bool>,
}
#[derive(Debug, Clone, Deserialize, JsonSchema)]
pub struct SearchParams {
pub query: String,
#[serde(default)]
pub limit: Option<u32>,
#[serde(default)]
pub scrape_results: Option<bool>,
}
#[derive(Clone)]
pub struct EssenceMcpServer {
tool_router: ToolRouter<Self>,
}
#[tool_router]
impl EssenceMcpServer {
#[allow(clippy::new_without_default)]
pub fn new() -> Self {
Self {
tool_router: Self::tool_router(),
}
}
#[tool(description = "Scrape a single web page and return its content as Markdown (default), HTML, or other formats. Uses intelligent HTTP -> Browser fallback for reliability.")]
async fn scrape(
&self,
Parameters(params): Parameters<ScrapeParams>,
) -> Result<CallToolResult, McpError> {
info!("MCP tool call: scrape url={}", params.url);
let request = ScrapeRequest {
url: params.url.clone(),
formats: params.formats.unwrap_or_else(|| vec!["markdown".to_string()]),
engine: params.engine.unwrap_or_else(|| "auto".to_string()),
timeout: params.timeout_ms.unwrap_or(30000),
..ScrapeRequest::default()
};
match scrape_core_logic(&request).await {
Ok(response) => {
let json = serde_json::to_string_pretty(&response).map_err(|e| {
McpError::internal_error(
format!("Failed to serialize scrape response: {}", e),
None,
)
})?;
Ok(CallToolResult::success(vec![Content::text(json)]))
}
Err(e) => {
error!("MCP scrape error for {}: {}", params.url, e);
Ok(CallToolResult::error(vec![Content::text(format!(
"Scrape failed: {}",
e
))]))
}
}
}
#[tool(description = "Discover URLs from a website via sitemaps and in-page link extraction. Returns a list of discovered URLs.")]
async fn map(
&self,
Parameters(params): Parameters<MapParams>,
) -> Result<CallToolResult, McpError> {
info!("MCP tool call: map url={}", params.url);
let map_request = MapRequest {
url: params.url.clone(),
search: params.search,
ignore_sitemap: params.ignore_sitemap,
include_subdomains: params.include_subdomains.or(Some(true)),
limit: params.limit.or(Some(5000)),
};
match mapper::discover_urls(¶ms.url, &map_request).await {
Ok(links) => {
let result = serde_json::json!({
"success": true,
"count": links.len(),
"links": links
});
let json = serde_json::to_string_pretty(&result).map_err(|e| {
McpError::internal_error(
format!("Failed to serialize map response: {}", e),
None,
)
})?;
Ok(CallToolResult::success(vec![Content::text(json)]))
}
Err(e) => {
error!("MCP map error for {}: {}", params.url, e);
Ok(CallToolResult::error(vec![Content::text(format!(
"Map failed: {}",
e
))]))
}
}
}
#[tool(description = "Crawl a website starting from a URL, following links up to a specified depth and page limit. Returns scraped content from all crawled pages.")]
async fn crawl(
&self,
Parameters(params): Parameters<CrawlParams>,
) -> Result<CallToolResult, McpError> {
info!("MCP tool call: crawl url={}", params.url);
let crawl_request = CrawlRequest {
url: params.url.clone(),
max_depth: params.max_depth.unwrap_or(2),
limit: params.limit.unwrap_or(100),
include_paths: params.include_paths,
exclude_paths: params.exclude_paths,
allow_backward_links: params.allow_backward_links,
allow_external_links: params.allow_external_links,
ignore_sitemap: None,
detect_pagination: Some(true),
max_pagination_pages: Some(50),
use_parallel: None,
};
match crawl_website(&crawl_request).await {
Ok(documents) => {
let result = serde_json::json!({
"success": true,
"pages_crawled": documents.len(),
"data": documents
});
let json = serde_json::to_string_pretty(&result).map_err(|e| {
McpError::internal_error(
format!("Failed to serialize crawl response: {}", e),
None,
)
})?;
Ok(CallToolResult::success(vec![Content::text(json)]))
}
Err(e) => {
error!("MCP crawl error for {}: {}", params.url, e);
Ok(CallToolResult::error(vec![Content::text(format!(
"Crawl failed: {}",
e
))]))
}
}
}
#[tool(description = "Search the web using DuckDuckGo and optionally scrape each result page for full content. Returns search results with titles, URLs, and snippets.")]
async fn search(
&self,
Parameters(params): Parameters<SearchParams>,
) -> Result<CallToolResult, McpError> {
info!("MCP tool call: search query={}", params.query);
let provider = SearchProvider::new().map_err(|e| {
McpError::internal_error(
format!("Failed to create search provider: {}", e),
None,
)
})?;
let limit = params.limit.unwrap_or(10);
let mut results = provider
.search_duckduckgo(¶ms.query, limit)
.await
.map_err(|e| {
McpError::internal_error(
format!("Search failed: {}", e),
None,
)
})?;
if params.scrape_results.unwrap_or(false) {
info!("Scraping {} search results", results.len());
for result in &mut results {
let scrape_req = ScrapeRequest {
url: result.url.clone(),
formats: vec!["markdown".to_string()],
engine: "http".to_string(),
timeout: 10000,
only_main_content: true,
..ScrapeRequest::default()
};
match scrape_core_logic(&scrape_req).await {
Ok(response) => {
if let Some(data) = response.data {
result.content = Some(data);
}
}
Err(e) => {
error!("Failed to scrape search result {}: {}", result.url, e);
}
}
}
}
let response = serde_json::json!({
"success": true,
"count": results.len(),
"data": results
});
let json = serde_json::to_string_pretty(&response).map_err(|e| {
McpError::internal_error(
format!("Failed to serialize search response: {}", e),
None,
)
})?;
Ok(CallToolResult::success(vec![Content::text(json)]))
}
}
#[tool_handler]
impl ServerHandler for EssenceMcpServer {
fn get_info(&self) -> ServerInfo {
ServerInfo {
server_info: Implementation {
name: "essence".to_string(),
title: Some("Essence Web Retrieval Engine".to_string()),
version: env!("CARGO_PKG_VERSION").to_string(),
description: Some(
"Production-ready web retrieval engine with intelligent HTTP->Browser fallback, \
providing LLM-ready Markdown outputs. Supports scraping, crawling, URL discovery, \
and web search."
.to_string(),
),
icons: None,
website_url: None,
},
capabilities: ServerCapabilities::builder()
.enable_tools()
.build(),
instructions: Some(
"Essence is a web retrieval engine. Use the 'scrape' tool to fetch a single page, \
'map' to discover URLs on a site, 'crawl' to traverse multiple pages, or 'search' \
to find pages via DuckDuckGo web search. All tools return structured JSON with \
Markdown content suitable for LLM consumption."
.to_string(),
),
..Default::default()
}
}
}