1use async_trait::async_trait;
11use reqwest::{Client, Method, Response};
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14use std::time::Duration;
15use tracing::{debug, info, warn};
16use url::Url;
17
18use crate::common::{
19 BaseServer, McpContent, McpServerBase, McpTool, McpToolRequest, McpToolResponse,
20 ServerCapabilities, ServerConfig,
21};
22use crate::{McpToolsError, Result};
23
24pub struct WebToolsServer {
26 base: BaseServer,
27 client: Client,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct HttpRequest {
33 pub url: String,
34 pub method: String,
35 pub headers: HashMap<String, String>,
36 pub body: Option<String>,
37 pub timeout: Option<u64>,
38 pub follow_redirects: bool,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct HttpResponse {
44 pub status: u16,
45 pub status_text: String,
46 pub headers: HashMap<String, String>,
47 pub body: String,
48 pub url: String,
49 pub content_type: Option<String>,
50 pub content_length: Option<u64>,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct WebPageAnalysis {
56 pub url: String,
57 pub title: Option<String>,
58 pub description: Option<String>,
59 pub keywords: Vec<String>,
60 pub links: Vec<String>,
61 pub images: Vec<String>,
62 pub forms: Vec<FormInfo>,
63 pub meta_tags: HashMap<String, String>,
64 pub word_count: u32,
65 pub load_time: u64,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct FormInfo {
71 pub action: Option<String>,
72 pub method: String,
73 pub fields: Vec<FormField>,
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct FormField {
79 pub name: Option<String>,
80 pub field_type: String,
81 pub required: bool,
82 pub placeholder: Option<String>,
83}
84
85#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct UrlAnalysis {
88 pub url: String,
89 pub is_valid: bool,
90 pub scheme: Option<String>,
91 pub host: Option<String>,
92 pub port: Option<u16>,
93 pub path: String,
94 pub query: Option<String>,
95 pub fragment: Option<String>,
96 pub domain_info: DomainInfo,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct DomainInfo {
102 pub domain: String,
103 pub subdomain: Option<String>,
104 pub tld: Option<String>,
105 pub is_ip: bool,
106}
107
108impl WebToolsServer {
109 pub async fn new(config: ServerConfig) -> Result<Self> {
110 let base = BaseServer::new(config).await?;
111
112 let client = Client::builder()
114 .timeout(Duration::from_secs(30))
115 .user_agent("MCP-Tools/1.0")
116 .build()
117 .map_err(|e| McpToolsError::Server(format!("Failed to create HTTP client: {}", e)))?;
118
119 Ok(Self { base, client })
120 }
121
122 async fn http_request(&self, request: HttpRequest) -> Result<HttpResponse> {
124 debug!("Making HTTP request to: {}", request.url);
125
126 let url = Url::parse(&request.url)
128 .map_err(|e| McpToolsError::Server(format!("Invalid URL: {}", e)))?;
129
130 let method = match request.method.to_uppercase().as_str() {
132 "GET" => Method::GET,
133 "POST" => Method::POST,
134 "PUT" => Method::PUT,
135 "DELETE" => Method::DELETE,
136 "HEAD" => Method::HEAD,
137 "PATCH" => Method::PATCH,
138 _ => {
139 return Err(McpToolsError::Server(format!(
140 "Unsupported HTTP method: {}",
141 request.method
142 )))
143 }
144 };
145
146 let mut req_builder = self.client.request(method, url);
148
149 for (key, value) in request.headers {
151 req_builder = req_builder.header(&key, &value);
152 }
153
154 if let Some(body) = request.body {
156 req_builder = req_builder.body(body);
157 }
158
159 if let Some(timeout_secs) = request.timeout {
161 req_builder = req_builder.timeout(Duration::from_secs(timeout_secs));
162 }
163
164 let start_time = std::time::Instant::now();
166 let response = req_builder
167 .send()
168 .await
169 .map_err(|e| McpToolsError::Server(format!("HTTP request failed: {}", e)))?;
170
171 let status = response.status().as_u16();
173 let status_text = response
174 .status()
175 .canonical_reason()
176 .unwrap_or("Unknown")
177 .to_string();
178 let final_url = response.url().to_string();
179
180 let mut headers = HashMap::new();
182 for (key, value) in response.headers() {
183 if let Ok(value_str) = value.to_str() {
184 headers.insert(key.to_string(), value_str.to_string());
185 }
186 }
187
188 let content_type = response
189 .headers()
190 .get("content-type")
191 .and_then(|v| v.to_str().ok())
192 .map(|s| s.to_string());
193
194 let content_length = response.content_length();
195
196 let body = response
198 .text()
199 .await
200 .map_err(|e| McpToolsError::Server(format!("Failed to read response body: {}", e)))?;
201
202 Ok(HttpResponse {
203 status,
204 status_text,
205 headers,
206 body,
207 url: final_url,
208 content_type,
209 content_length,
210 })
211 }
212
213 async fn analyze_webpage(&self, url: &str) -> Result<WebPageAnalysis> {
215 debug!("Analyzing webpage: {}", url);
216
217 let request = HttpRequest {
218 url: url.to_string(),
219 method: "GET".to_string(),
220 headers: HashMap::new(),
221 body: None,
222 timeout: Some(30),
223 follow_redirects: true,
224 };
225
226 let start_time = std::time::Instant::now();
227 let response = self.http_request(request).await?;
228 let load_time = start_time.elapsed().as_millis() as u64;
229
230 let html = &response.body;
232
233 let title = self.extract_html_tag(html, "title");
235
236 let description = self.extract_meta_content(html, "description");
238
239 let keywords_str = self
241 .extract_meta_content(html, "keywords")
242 .unwrap_or_default();
243 let keywords: Vec<String> = keywords_str
244 .split(',')
245 .map(|s| s.trim().to_string())
246 .filter(|s| !s.is_empty())
247 .collect();
248
249 let links = self.extract_links(html);
251
252 let images = self.extract_images(html);
254
255 let forms = self.extract_forms(html);
257
258 let meta_tags = self.extract_meta_tags(html);
260
261 let word_count = html
263 .split_whitespace()
264 .filter(|word| !word.starts_with('<'))
265 .count() as u32;
266
267 Ok(WebPageAnalysis {
268 url: response.url,
269 title,
270 description,
271 keywords,
272 links,
273 images,
274 forms,
275 meta_tags,
276 word_count,
277 load_time,
278 })
279 }
280
281 async fn analyze_url(&self, url_str: &str) -> Result<UrlAnalysis> {
283 debug!("Analyzing URL: {}", url_str);
284
285 match Url::parse(url_str) {
286 Ok(url) => {
287 let domain = url.host_str().unwrap_or("").to_string();
288 let domain_parts: Vec<&str> = domain.split('.').collect();
289
290 let (subdomain, tld) = if domain_parts.len() > 2 {
291 (
292 Some(domain_parts[0].to_string()),
293 Some(domain_parts.last().unwrap().to_string()),
294 )
295 } else {
296 (None, domain_parts.last().map(|s| s.to_string()))
297 };
298
299 let is_ip = domain.parse::<std::net::IpAddr>().is_ok();
300
301 Ok(UrlAnalysis {
302 url: url_str.to_string(),
303 is_valid: true,
304 scheme: Some(url.scheme().to_string()),
305 host: url.host_str().map(|s| s.to_string()),
306 port: url.port(),
307 path: url.path().to_string(),
308 query: url.query().map(|s| s.to_string()),
309 fragment: url.fragment().map(|s| s.to_string()),
310 domain_info: DomainInfo {
311 domain,
312 subdomain,
313 tld,
314 is_ip,
315 },
316 })
317 }
318 Err(_) => Ok(UrlAnalysis {
319 url: url_str.to_string(),
320 is_valid: false,
321 scheme: None,
322 host: None,
323 port: None,
324 path: String::new(),
325 query: None,
326 fragment: None,
327 domain_info: DomainInfo {
328 domain: String::new(),
329 subdomain: None,
330 tld: None,
331 is_ip: false,
332 },
333 }),
334 }
335 }
336
337 fn extract_html_tag(&self, html: &str, tag: &str) -> Option<String> {
339 let start_tag = format!("<{}>", tag);
340 let end_tag = format!("</{}>", tag);
341
342 if let Some(start) = html.find(&start_tag) {
343 if let Some(end) = html[start..].find(&end_tag) {
344 let content = &html[start + start_tag.len()..start + end];
345 return Some(content.trim().to_string());
346 }
347 }
348 None
349 }
350
351 fn extract_meta_content(&self, html: &str, name: &str) -> Option<String> {
352 let pattern = format!(r#"<meta[^>]*name="{}"[^>]*content="([^"]*)"#, name);
353 if let Some(start) = html.find(&format!(r#"name="{}""#, name)) {
355 if let Some(content_start) = html[start..].find(r#"content=""#) {
356 let content_pos = start + content_start + 9; if let Some(content_end) = html[content_pos..].find('"') {
358 return Some(html[content_pos..content_pos + content_end].to_string());
359 }
360 }
361 }
362 None
363 }
364
365 fn extract_links(&self, html: &str) -> Vec<String> {
366 let mut links = Vec::new();
367 let mut pos = 0;
368
369 while let Some(href_pos) = html[pos..].find("href=\"") {
370 let start = pos + href_pos + 6; if let Some(end_pos) = html[start..].find('"') {
372 let link = html[start..start + end_pos].to_string();
373 if !link.is_empty() && !link.starts_with('#') {
374 links.push(link);
375 }
376 pos = start + end_pos;
377 } else {
378 break;
379 }
380 }
381
382 links
383 }
384
385 fn extract_images(&self, html: &str) -> Vec<String> {
386 let mut images = Vec::new();
387 let mut pos = 0;
388
389 while let Some(src_pos) = html[pos..].find("src=\"") {
390 let start = pos + src_pos + 5; if let Some(end_pos) = html[start..].find('"') {
392 let image = html[start..start + end_pos].to_string();
393 if !image.is_empty() {
394 images.push(image);
395 }
396 pos = start + end_pos;
397 } else {
398 break;
399 }
400 }
401
402 images
403 }
404
405 fn extract_forms(&self, _html: &str) -> Vec<FormInfo> {
406 Vec::new()
408 }
409
410 fn extract_meta_tags(&self, _html: &str) -> HashMap<String, String> {
411 HashMap::new()
413 }
414}
415
416#[async_trait]
417impl McpServerBase for WebToolsServer {
418 async fn get_capabilities(&self) -> Result<ServerCapabilities> {
419 let mut capabilities = self.base.get_capabilities().await?;
420
421 let web_tools = vec![
423 McpTool {
424 name: "http_request".to_string(),
425 description: "Make HTTP requests (GET, POST, PUT, DELETE) to web endpoints"
426 .to_string(),
427 input_schema: serde_json::json!({
428 "type": "object",
429 "properties": {
430 "url": {
431 "type": "string",
432 "description": "Target URL for the HTTP request"
433 },
434 "method": {
435 "type": "string",
436 "description": "HTTP method (GET, POST, PUT, DELETE, HEAD, PATCH)",
437 "enum": ["GET", "POST", "PUT", "DELETE", "HEAD", "PATCH"],
438 "default": "GET"
439 },
440 "headers": {
441 "type": "object",
442 "description": "HTTP headers as key-value pairs",
443 "additionalProperties": {"type": "string"}
444 },
445 "body": {
446 "type": "string",
447 "description": "Request body (for POST, PUT, PATCH methods)"
448 },
449 "timeout": {
450 "type": "integer",
451 "description": "Request timeout in seconds (default: 30)",
452 "minimum": 1,
453 "maximum": 300
454 },
455 "follow_redirects": {
456 "type": "boolean",
457 "description": "Whether to follow HTTP redirects (default: true)"
458 }
459 },
460 "required": ["url"]
461 }),
462 category: "web".to_string(),
463 requires_permission: true,
464 permissions: vec!["network.http".to_string()],
465 },
466 McpTool {
467 name: "analyze_webpage".to_string(),
468 description:
469 "Analyze a web page and extract metadata, links, images, and other information"
470 .to_string(),
471 input_schema: serde_json::json!({
472 "type": "object",
473 "properties": {
474 "url": {
475 "type": "string",
476 "description": "URL of the web page to analyze"
477 }
478 },
479 "required": ["url"]
480 }),
481 category: "web".to_string(),
482 requires_permission: true,
483 permissions: vec!["network.http".to_string()],
484 },
485 McpTool {
486 name: "analyze_url".to_string(),
487 description:
488 "Analyze URL structure and extract components (scheme, host, path, query, etc.)"
489 .to_string(),
490 input_schema: serde_json::json!({
491 "type": "object",
492 "properties": {
493 "url": {
494 "type": "string",
495 "description": "URL to analyze"
496 }
497 },
498 "required": ["url"]
499 }),
500 category: "web".to_string(),
501 requires_permission: false,
502 permissions: vec![],
503 },
504 McpTool {
505 name: "fetch_content".to_string(),
506 description: "Fetch content from a URL with automatic content type detection"
507 .to_string(),
508 input_schema: serde_json::json!({
509 "type": "object",
510 "properties": {
511 "url": {
512 "type": "string",
513 "description": "URL to fetch content from"
514 },
515 "headers": {
516 "type": "object",
517 "description": "Additional HTTP headers",
518 "additionalProperties": {"type": "string"}
519 },
520 "timeout": {
521 "type": "integer",
522 "description": "Request timeout in seconds (default: 30)"
523 }
524 },
525 "required": ["url"]
526 }),
527 category: "web".to_string(),
528 requires_permission: true,
529 permissions: vec!["network.http".to_string()],
530 },
531 ];
532
533 capabilities.tools = web_tools;
534 Ok(capabilities)
535 }
536
537 async fn handle_tool_request(&self, request: McpToolRequest) -> Result<McpToolResponse> {
538 info!("Handling Web Tools request: {}", request.tool);
539
540 match request.tool.as_str() {
541 "http_request" => {
542 debug!("Making HTTP request");
543
544 let url = request
545 .arguments
546 .get("url")
547 .and_then(|v| v.as_str())
548 .ok_or_else(|| McpToolsError::Server("Missing 'url' parameter".to_string()))?;
549
550 let method = request
551 .arguments
552 .get("method")
553 .and_then(|v| v.as_str())
554 .unwrap_or("GET");
555
556 let headers = request
557 .arguments
558 .get("headers")
559 .and_then(|v| v.as_object())
560 .map(|obj| {
561 obj.iter()
562 .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
563 .collect()
564 })
565 .unwrap_or_default();
566
567 let body = request
568 .arguments
569 .get("body")
570 .and_then(|v| v.as_str())
571 .map(|s| s.to_string());
572
573 let timeout = request.arguments.get("timeout").and_then(|v| v.as_u64());
574
575 let follow_redirects = request
576 .arguments
577 .get("follow_redirects")
578 .and_then(|v| v.as_bool())
579 .unwrap_or(true);
580
581 let http_request = HttpRequest {
582 url: url.to_string(),
583 method: method.to_string(),
584 headers,
585 body,
586 timeout,
587 follow_redirects,
588 };
589
590 let response = self.http_request(http_request).await?;
591
592 let content_text = format!(
593 "HTTP Request Complete\n\
594 Status: {} {}\n\
595 URL: {}\n\
596 Content-Type: {}\n\
597 Content-Length: {} bytes",
598 response.status,
599 response.status_text,
600 response.url,
601 response.content_type.as_deref().unwrap_or("unknown"),
602 response
603 .content_length
604 .unwrap_or(response.body.len() as u64)
605 );
606
607 let mut metadata = HashMap::new();
608 metadata.insert("http_response".to_string(), serde_json::to_value(response)?);
609
610 Ok(McpToolResponse {
611 id: request.id,
612 content: vec![McpContent::text(content_text)],
613 is_error: false,
614 error: None,
615 metadata,
616 })
617 }
618 "analyze_webpage" => {
619 debug!("Analyzing webpage");
620
621 let url = request
622 .arguments
623 .get("url")
624 .and_then(|v| v.as_str())
625 .ok_or_else(|| McpToolsError::Server("Missing 'url' parameter".to_string()))?;
626
627 let analysis = self.analyze_webpage(url).await?;
628
629 let content_text = format!(
630 "Web Page Analysis Complete\n\
631 URL: {}\n\
632 Title: {}\n\
633 Description: {}\n\
634 Links Found: {}\n\
635 Images Found: {}\n\
636 Word Count: {}\n\
637 Load Time: {}ms",
638 analysis.url,
639 analysis.title.as_deref().unwrap_or("None"),
640 analysis.description.as_deref().unwrap_or("None"),
641 analysis.links.len(),
642 analysis.images.len(),
643 analysis.word_count,
644 analysis.load_time
645 );
646
647 let mut metadata = HashMap::new();
648 metadata.insert(
649 "webpage_analysis".to_string(),
650 serde_json::to_value(analysis)?,
651 );
652
653 Ok(McpToolResponse {
654 id: request.id,
655 content: vec![McpContent::text(content_text)],
656 is_error: false,
657 error: None,
658 metadata,
659 })
660 }
661 "analyze_url" => {
662 debug!("Analyzing URL structure");
663
664 let url = request
665 .arguments
666 .get("url")
667 .and_then(|v| v.as_str())
668 .ok_or_else(|| McpToolsError::Server("Missing 'url' parameter".to_string()))?;
669
670 let analysis = self.analyze_url(url).await?;
671
672 let content_text = format!(
673 "URL Analysis Complete\n\
674 URL: {}\n\
675 Valid: {}\n\
676 Scheme: {}\n\
677 Host: {}\n\
678 Port: {}\n\
679 Path: {}\n\
680 Domain: {}",
681 analysis.url,
682 analysis.is_valid,
683 analysis.scheme.as_deref().unwrap_or("None"),
684 analysis.host.as_deref().unwrap_or("None"),
685 analysis
686 .port
687 .map(|p| p.to_string())
688 .as_deref()
689 .unwrap_or("None"),
690 analysis.path,
691 analysis.domain_info.domain
692 );
693
694 let mut metadata = HashMap::new();
695 metadata.insert("url_analysis".to_string(), serde_json::to_value(analysis)?);
696
697 Ok(McpToolResponse {
698 id: request.id,
699 content: vec![McpContent::text(content_text)],
700 is_error: false,
701 error: None,
702 metadata,
703 })
704 }
705 "fetch_content" => {
706 debug!("Fetching content from URL");
707
708 let url = request
709 .arguments
710 .get("url")
711 .and_then(|v| v.as_str())
712 .ok_or_else(|| McpToolsError::Server("Missing 'url' parameter".to_string()))?;
713
714 let headers = request
715 .arguments
716 .get("headers")
717 .and_then(|v| v.as_object())
718 .map(|obj| {
719 obj.iter()
720 .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
721 .collect()
722 })
723 .unwrap_or_default();
724
725 let timeout = request.arguments.get("timeout").and_then(|v| v.as_u64());
726
727 let http_request = HttpRequest {
728 url: url.to_string(),
729 method: "GET".to_string(),
730 headers,
731 body: None,
732 timeout,
733 follow_redirects: true,
734 };
735
736 let response = self.http_request(http_request).await?;
737
738 let content_text = format!(
739 "Content Fetched Successfully\n\
740 URL: {}\n\
741 Status: {}\n\
742 Content-Type: {}\n\
743 Size: {} bytes\n\n{}",
744 response.url,
745 response.status,
746 response.content_type.as_deref().unwrap_or("unknown"),
747 response.body.len(),
748 if response.body.len() > 1000 {
749 format!("{}...", &response.body[..1000])
750 } else {
751 response.body.clone()
752 }
753 );
754
755 let mut metadata = HashMap::new();
756 metadata.insert(
757 "fetched_content".to_string(),
758 serde_json::to_value(response)?,
759 );
760
761 Ok(McpToolResponse {
762 id: request.id,
763 content: vec![McpContent::text(content_text)],
764 is_error: false,
765 error: None,
766 metadata,
767 })
768 }
769 _ => {
770 warn!("Unknown Web Tools request: {}", request.tool);
771 Err(McpToolsError::Server(format!(
772 "Unknown Web Tools request: {}",
773 request.tool
774 )))
775 }
776 }
777 }
778
779 async fn get_stats(&self) -> Result<crate::common::ServerStats> {
780 self.base.get_stats().await
781 }
782
783 async fn initialize(&mut self) -> Result<()> {
784 info!("Initializing Web Tools MCP Server");
785 Ok(())
786 }
787
788 async fn shutdown(&mut self) -> Result<()> {
789 info!("Shutting down Web Tools MCP Server");
790 Ok(())
791 }
792}