use async_trait::async_trait;
use reqwest::{Client, Method, Response};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Duration;
use tracing::{debug, info, warn};
use url::Url;
use crate::common::{
BaseServer, McpContent, McpServerBase, McpTool, McpToolRequest, McpToolResponse,
ServerCapabilities, ServerConfig,
};
use crate::{McpToolsError, Result};
pub struct WebToolsServer {
base: BaseServer,
client: Client,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HttpRequest {
pub url: String,
pub method: String,
pub headers: HashMap<String, String>,
pub body: Option<String>,
pub timeout: Option<u64>,
pub follow_redirects: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HttpResponse {
pub status: u16,
pub status_text: String,
pub headers: HashMap<String, String>,
pub body: String,
pub url: String,
pub content_type: Option<String>,
pub content_length: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebPageAnalysis {
pub url: String,
pub title: Option<String>,
pub description: Option<String>,
pub keywords: Vec<String>,
pub links: Vec<String>,
pub images: Vec<String>,
pub forms: Vec<FormInfo>,
pub meta_tags: HashMap<String, String>,
pub word_count: u32,
pub load_time: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormInfo {
pub action: Option<String>,
pub method: String,
pub fields: Vec<FormField>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormField {
pub name: Option<String>,
pub field_type: String,
pub required: bool,
pub placeholder: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UrlAnalysis {
pub url: String,
pub is_valid: bool,
pub scheme: Option<String>,
pub host: Option<String>,
pub port: Option<u16>,
pub path: String,
pub query: Option<String>,
pub fragment: Option<String>,
pub domain_info: DomainInfo,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DomainInfo {
pub domain: String,
pub subdomain: Option<String>,
pub tld: Option<String>,
pub is_ip: bool,
}
impl WebToolsServer {
pub async fn new(config: ServerConfig) -> Result<Self> {
let base = BaseServer::new(config).await?;
let client = Client::builder()
.timeout(Duration::from_secs(30))
.user_agent("MCP-Tools/1.0")
.build()
.map_err(|e| McpToolsError::Server(format!("Failed to create HTTP client: {}", e)))?;
Ok(Self { base, client })
}
async fn http_request(&self, request: HttpRequest) -> Result<HttpResponse> {
debug!("Making HTTP request to: {}", request.url);
let url = Url::parse(&request.url)
.map_err(|e| McpToolsError::Server(format!("Invalid URL: {}", e)))?;
let method = match request.method.to_uppercase().as_str() {
"GET" => Method::GET,
"POST" => Method::POST,
"PUT" => Method::PUT,
"DELETE" => Method::DELETE,
"HEAD" => Method::HEAD,
"PATCH" => Method::PATCH,
_ => {
return Err(McpToolsError::Server(format!(
"Unsupported HTTP method: {}",
request.method
)))
}
};
let mut req_builder = self.client.request(method, url);
for (key, value) in request.headers {
req_builder = req_builder.header(&key, &value);
}
if let Some(body) = request.body {
req_builder = req_builder.body(body);
}
if let Some(timeout_secs) = request.timeout {
req_builder = req_builder.timeout(Duration::from_secs(timeout_secs));
}
let start_time = std::time::Instant::now();
let response = req_builder
.send()
.await
.map_err(|e| McpToolsError::Server(format!("HTTP request failed: {}", e)))?;
let status = response.status().as_u16();
let status_text = response
.status()
.canonical_reason()
.unwrap_or("Unknown")
.to_string();
let final_url = response.url().to_string();
let mut headers = HashMap::new();
for (key, value) in response.headers() {
if let Ok(value_str) = value.to_str() {
headers.insert(key.to_string(), value_str.to_string());
}
}
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string());
let content_length = response.content_length();
let body = response
.text()
.await
.map_err(|e| McpToolsError::Server(format!("Failed to read response body: {}", e)))?;
Ok(HttpResponse {
status,
status_text,
headers,
body,
url: final_url,
content_type,
content_length,
})
}
async fn analyze_webpage(&self, url: &str) -> Result<WebPageAnalysis> {
debug!("Analyzing webpage: {}", url);
let request = HttpRequest {
url: url.to_string(),
method: "GET".to_string(),
headers: HashMap::new(),
body: None,
timeout: Some(30),
follow_redirects: true,
};
let start_time = std::time::Instant::now();
let response = self.http_request(request).await?;
let load_time = start_time.elapsed().as_millis() as u64;
let html = &response.body;
let title = self.extract_html_tag(html, "title");
let description = self.extract_meta_content(html, "description");
let keywords_str = self
.extract_meta_content(html, "keywords")
.unwrap_or_default();
let keywords: Vec<String> = keywords_str
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();
let links = self.extract_links(html);
let images = self.extract_images(html);
let forms = self.extract_forms(html);
let meta_tags = self.extract_meta_tags(html);
let word_count = html
.split_whitespace()
.filter(|word| !word.starts_with('<'))
.count() as u32;
Ok(WebPageAnalysis {
url: response.url,
title,
description,
keywords,
links,
images,
forms,
meta_tags,
word_count,
load_time,
})
}
async fn analyze_url(&self, url_str: &str) -> Result<UrlAnalysis> {
debug!("Analyzing URL: {}", url_str);
match Url::parse(url_str) {
Ok(url) => {
let domain = url.host_str().unwrap_or("").to_string();
let domain_parts: Vec<&str> = domain.split('.').collect();
let (subdomain, tld) = if domain_parts.len() > 2 {
(
Some(domain_parts[0].to_string()),
Some(domain_parts.last().unwrap().to_string()),
)
} else {
(None, domain_parts.last().map(|s| s.to_string()))
};
let is_ip = domain.parse::<std::net::IpAddr>().is_ok();
Ok(UrlAnalysis {
url: url_str.to_string(),
is_valid: true,
scheme: Some(url.scheme().to_string()),
host: url.host_str().map(|s| s.to_string()),
port: url.port(),
path: url.path().to_string(),
query: url.query().map(|s| s.to_string()),
fragment: url.fragment().map(|s| s.to_string()),
domain_info: DomainInfo {
domain,
subdomain,
tld,
is_ip,
},
})
}
Err(_) => Ok(UrlAnalysis {
url: url_str.to_string(),
is_valid: false,
scheme: None,
host: None,
port: None,
path: String::new(),
query: None,
fragment: None,
domain_info: DomainInfo {
domain: String::new(),
subdomain: None,
tld: None,
is_ip: false,
},
}),
}
}
fn extract_html_tag(&self, html: &str, tag: &str) -> Option<String> {
let start_tag = format!("<{}>", tag);
let end_tag = format!("</{}>", tag);
if let Some(start) = html.find(&start_tag) {
if let Some(end) = html[start..].find(&end_tag) {
let content = &html[start + start_tag.len()..start + end];
return Some(content.trim().to_string());
}
}
None
}
fn extract_meta_content(&self, html: &str, name: &str) -> Option<String> {
let pattern = format!(r#"<meta[^>]*name="{}"[^>]*content="([^"]*)"#, name);
if let Some(start) = html.find(&format!(r#"name="{}""#, name)) {
if let Some(content_start) = html[start..].find(r#"content=""#) {
let content_pos = start + content_start + 9; if let Some(content_end) = html[content_pos..].find('"') {
return Some(html[content_pos..content_pos + content_end].to_string());
}
}
}
None
}
fn extract_links(&self, html: &str) -> Vec<String> {
let mut links = Vec::new();
let mut pos = 0;
while let Some(href_pos) = html[pos..].find("href=\"") {
let start = pos + href_pos + 6; if let Some(end_pos) = html[start..].find('"') {
let link = html[start..start + end_pos].to_string();
if !link.is_empty() && !link.starts_with('#') {
links.push(link);
}
pos = start + end_pos;
} else {
break;
}
}
links
}
fn extract_images(&self, html: &str) -> Vec<String> {
let mut images = Vec::new();
let mut pos = 0;
while let Some(src_pos) = html[pos..].find("src=\"") {
let start = pos + src_pos + 5; if let Some(end_pos) = html[start..].find('"') {
let image = html[start..start + end_pos].to_string();
if !image.is_empty() {
images.push(image);
}
pos = start + end_pos;
} else {
break;
}
}
images
}
fn extract_forms(&self, _html: &str) -> Vec<FormInfo> {
Vec::new()
}
fn extract_meta_tags(&self, _html: &str) -> HashMap<String, String> {
HashMap::new()
}
}
#[async_trait]
impl McpServerBase for WebToolsServer {
async fn get_capabilities(&self) -> Result<ServerCapabilities> {
let mut capabilities = self.base.get_capabilities().await?;
let web_tools = vec![
McpTool {
name: "http_request".to_string(),
description: "Make HTTP requests (GET, POST, PUT, DELETE) to web endpoints"
.to_string(),
input_schema: serde_json::json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Target URL for the HTTP request"
},
"method": {
"type": "string",
"description": "HTTP method (GET, POST, PUT, DELETE, HEAD, PATCH)",
"enum": ["GET", "POST", "PUT", "DELETE", "HEAD", "PATCH"],
"default": "GET"
},
"headers": {
"type": "object",
"description": "HTTP headers as key-value pairs",
"additionalProperties": {"type": "string"}
},
"body": {
"type": "string",
"description": "Request body (for POST, PUT, PATCH methods)"
},
"timeout": {
"type": "integer",
"description": "Request timeout in seconds (default: 30)",
"minimum": 1,
"maximum": 300
},
"follow_redirects": {
"type": "boolean",
"description": "Whether to follow HTTP redirects (default: true)"
}
},
"required": ["url"]
}),
category: "web".to_string(),
requires_permission: true,
permissions: vec!["network.http".to_string()],
},
McpTool {
name: "analyze_webpage".to_string(),
description:
"Analyze a web page and extract metadata, links, images, and other information"
.to_string(),
input_schema: serde_json::json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL of the web page to analyze"
}
},
"required": ["url"]
}),
category: "web".to_string(),
requires_permission: true,
permissions: vec!["network.http".to_string()],
},
McpTool {
name: "analyze_url".to_string(),
description:
"Analyze URL structure and extract components (scheme, host, path, query, etc.)"
.to_string(),
input_schema: serde_json::json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL to analyze"
}
},
"required": ["url"]
}),
category: "web".to_string(),
requires_permission: false,
permissions: vec![],
},
McpTool {
name: "fetch_content".to_string(),
description: "Fetch content from a URL with automatic content type detection"
.to_string(),
input_schema: serde_json::json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL to fetch content from"
},
"headers": {
"type": "object",
"description": "Additional HTTP headers",
"additionalProperties": {"type": "string"}
},
"timeout": {
"type": "integer",
"description": "Request timeout in seconds (default: 30)"
}
},
"required": ["url"]
}),
category: "web".to_string(),
requires_permission: true,
permissions: vec!["network.http".to_string()],
},
];
capabilities.tools = web_tools;
Ok(capabilities)
}
async fn handle_tool_request(&self, request: McpToolRequest) -> Result<McpToolResponse> {
info!("Handling Web Tools request: {}", request.tool);
match request.tool.as_str() {
"http_request" => {
debug!("Making HTTP request");
let url = request
.arguments
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| McpToolsError::Server("Missing 'url' parameter".to_string()))?;
let method = request
.arguments
.get("method")
.and_then(|v| v.as_str())
.unwrap_or("GET");
let headers = request
.arguments
.get("headers")
.and_then(|v| v.as_object())
.map(|obj| {
obj.iter()
.filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
.collect()
})
.unwrap_or_default();
let body = request
.arguments
.get("body")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let timeout = request.arguments.get("timeout").and_then(|v| v.as_u64());
let follow_redirects = request
.arguments
.get("follow_redirects")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let http_request = HttpRequest {
url: url.to_string(),
method: method.to_string(),
headers,
body,
timeout,
follow_redirects,
};
let response = self.http_request(http_request).await?;
let content_text = format!(
"HTTP Request Complete\n\
Status: {} {}\n\
URL: {}\n\
Content-Type: {}\n\
Content-Length: {} bytes",
response.status,
response.status_text,
response.url,
response.content_type.as_deref().unwrap_or("unknown"),
response
.content_length
.unwrap_or(response.body.len() as u64)
);
let mut metadata = HashMap::new();
metadata.insert("http_response".to_string(), serde_json::to_value(response)?);
Ok(McpToolResponse {
id: request.id,
content: vec![McpContent::text(content_text)],
is_error: false,
error: None,
metadata,
})
}
"analyze_webpage" => {
debug!("Analyzing webpage");
let url = request
.arguments
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| McpToolsError::Server("Missing 'url' parameter".to_string()))?;
let analysis = self.analyze_webpage(url).await?;
let content_text = format!(
"Web Page Analysis Complete\n\
URL: {}\n\
Title: {}\n\
Description: {}\n\
Links Found: {}\n\
Images Found: {}\n\
Word Count: {}\n\
Load Time: {}ms",
analysis.url,
analysis.title.as_deref().unwrap_or("None"),
analysis.description.as_deref().unwrap_or("None"),
analysis.links.len(),
analysis.images.len(),
analysis.word_count,
analysis.load_time
);
let mut metadata = HashMap::new();
metadata.insert(
"webpage_analysis".to_string(),
serde_json::to_value(analysis)?,
);
Ok(McpToolResponse {
id: request.id,
content: vec![McpContent::text(content_text)],
is_error: false,
error: None,
metadata,
})
}
"analyze_url" => {
debug!("Analyzing URL structure");
let url = request
.arguments
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| McpToolsError::Server("Missing 'url' parameter".to_string()))?;
let analysis = self.analyze_url(url).await?;
let content_text = format!(
"URL Analysis Complete\n\
URL: {}\n\
Valid: {}\n\
Scheme: {}\n\
Host: {}\n\
Port: {}\n\
Path: {}\n\
Domain: {}",
analysis.url,
analysis.is_valid,
analysis.scheme.as_deref().unwrap_or("None"),
analysis.host.as_deref().unwrap_or("None"),
analysis
.port
.map(|p| p.to_string())
.as_deref()
.unwrap_or("None"),
analysis.path,
analysis.domain_info.domain
);
let mut metadata = HashMap::new();
metadata.insert("url_analysis".to_string(), serde_json::to_value(analysis)?);
Ok(McpToolResponse {
id: request.id,
content: vec![McpContent::text(content_text)],
is_error: false,
error: None,
metadata,
})
}
"fetch_content" => {
debug!("Fetching content from URL");
let url = request
.arguments
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| McpToolsError::Server("Missing 'url' parameter".to_string()))?;
let headers = request
.arguments
.get("headers")
.and_then(|v| v.as_object())
.map(|obj| {
obj.iter()
.filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
.collect()
})
.unwrap_or_default();
let timeout = request.arguments.get("timeout").and_then(|v| v.as_u64());
let http_request = HttpRequest {
url: url.to_string(),
method: "GET".to_string(),
headers,
body: None,
timeout,
follow_redirects: true,
};
let response = self.http_request(http_request).await?;
let content_text = format!(
"Content Fetched Successfully\n\
URL: {}\n\
Status: {}\n\
Content-Type: {}\n\
Size: {} bytes\n\n{}",
response.url,
response.status,
response.content_type.as_deref().unwrap_or("unknown"),
response.body.len(),
if response.body.len() > 1000 {
format!("{}...", &response.body[..1000])
} else {
response.body.clone()
}
);
let mut metadata = HashMap::new();
metadata.insert(
"fetched_content".to_string(),
serde_json::to_value(response)?,
);
Ok(McpToolResponse {
id: request.id,
content: vec![McpContent::text(content_text)],
is_error: false,
error: None,
metadata,
})
}
_ => {
warn!("Unknown Web Tools request: {}", request.tool);
Err(McpToolsError::Server(format!(
"Unknown Web Tools request: {}",
request.tool
)))
}
}
}
async fn get_stats(&self) -> Result<crate::common::ServerStats> {
self.base.get_stats().await
}
async fn initialize(&mut self) -> Result<()> {
info!("Initializing Web Tools MCP Server");
Ok(())
}
async fn shutdown(&mut self) -> Result<()> {
info!("Shutting down Web Tools MCP Server");
Ok(())
}
}