1use crate::browser::{BrowserController, CaptureFormat, CaptureOptions, PageCapture};
12use crate::error::Result;
13use crate::extraction::{ContentExtractor, LinkExtractor, MetadataExtractor};
14use crate::mcp::types::{McpToolDefinition, ToolCallResult, ToolContent};
15use crate::research::{ResearchConfig, SourceTier, TierClassifier, TriangulationEngine};
16use serde_json::{json, Value};
17use std::collections::HashMap;
18use std::net::IpAddr;
19use std::sync::Arc;
20use tokio::sync::RwLock;
21use tracing::{error, info, instrument, warn};
22
23fn is_url_safe(url: &str) -> std::result::Result<bool, String> {
30 let parsed = url::Url::parse(url).map_err(|e| format!("Invalid URL: {}", e))?;
31
32 match parsed.scheme() {
34 "http" | "https" => {}
35 scheme => {
36 warn!(scheme = %scheme, "SSRF: Blocked scheme");
37 return Ok(false);
38 }
39 }
40
41 let host = match parsed.host_str() {
43 Some(h) => h,
44 None => return Ok(false),
45 };
46
47 let localhost_variants = ["localhost", "127.0.0.1", "::1", "[::1]", "0.0.0.0", "0"];
49 if localhost_variants
50 .iter()
51 .any(|&l| host.eq_ignore_ascii_case(l))
52 {
53 warn!(host = %host, "SSRF: Blocked localhost");
54 return Ok(false);
55 }
56
57 if let Ok(ip) = host.parse::<IpAddr>() {
59 if !is_public_ip(&ip) {
60 warn!(ip = %ip, "SSRF: Blocked private/reserved IP");
61 return Ok(false);
62 }
63 }
64
65 let blocked_suffixes = [
67 ".internal",
68 ".local",
69 ".localhost",
70 ".lan",
71 ".corp",
72 ".home",
73 ];
74 if blocked_suffixes
75 .iter()
76 .any(|&s| host.to_lowercase().ends_with(s))
77 {
78 warn!(host = %host, "SSRF: Blocked internal domain");
79 return Ok(false);
80 }
81
82 let blocked_hosts = [
84 "169.254.169.254", "metadata.google.internal", "metadata", ];
88 if blocked_hosts.iter().any(|&h| host.eq_ignore_ascii_case(h)) {
89 warn!(host = %host, "SSRF: Blocked cloud metadata endpoint");
90 return Ok(false);
91 }
92
93 Ok(true)
94}
95
96fn is_public_ip(ip: &IpAddr) -> bool {
98 match ip {
99 IpAddr::V4(ipv4) => {
100 !ipv4.is_private()
102 && !ipv4.is_loopback()
103 && !ipv4.is_link_local()
104 && !ipv4.is_broadcast()
105 && !ipv4.is_documentation()
106 && !ipv4.is_unspecified()
107 && !(ipv4.octets()[0] == 100 && (64..=127).contains(&ipv4.octets()[1]))
109 && !(ipv4.octets()[0] == 192 && ipv4.octets()[1] == 0 && ipv4.octets()[2] == 0)
111 }
112 IpAddr::V6(ipv6) => {
113 !ipv6.is_loopback()
114 && !ipv6.is_unspecified()
115 && (ipv6.segments()[0] & 0xffc0) != 0xfe80
117 && (ipv6.segments()[0] & 0xfe00) != 0xfc00
119 }
120 }
121}
122
123fn validate_url_ssrf(url: &str) -> Option<ToolCallResult> {
125 match is_url_safe(url) {
126 Ok(true) => None, Ok(false) => Some(ToolCallResult::error(format!(
128 "SSRF protection: URL '{}' is not allowed (private IP, localhost, or blocked endpoint)",
129 url
130 ))),
131 Err(e) => Some(ToolCallResult::error(format!("Invalid URL: {}", e))),
132 }
133}
134
135pub trait McpTool: Send + Sync {
137 fn name(&self) -> &str;
139 fn description(&self) -> &str;
141 fn input_schema(&self) -> Value;
143 fn definition(&self) -> McpToolDefinition {
145 McpToolDefinition {
146 name: self.name().to_string(),
147 description: self.description().to_string(),
148 input_schema: self.input_schema(),
149 }
150 }
151}
152
153pub struct ToolRegistry {
155 tools: HashMap<String, Box<dyn McpTool>>,
156 #[allow(dead_code)]
157 browser: Arc<RwLock<Option<BrowserController>>>,
158}
159
160impl ToolRegistry {
161 pub fn new() -> Self {
163 let mut registry = Self {
164 tools: HashMap::new(),
165 browser: Arc::new(RwLock::new(None)),
166 };
167
168 registry.register(Box::new(WebNavigateTool));
170 registry.register(Box::new(WebScreenshotTool));
171 registry.register(Box::new(WebPdfTool));
172 registry.register(Box::new(WebExtractContentTool));
173 registry.register(Box::new(WebExtractLinksTool));
174 registry.register(Box::new(WebExtractMetadataTool));
175 registry.register(Box::new(WebExecuteJsTool));
176 registry.register(Box::new(WebCaptureMhtmlTool));
177
178 registry.register(Box::new(TriangulateSourcesTool));
180 registry.register(Box::new(VerifyClaimTool));
181 registry.register(Box::new(CheckSourceQualityTool));
182
183 registry
184 }
185
186 pub fn register(&mut self, tool: Box<dyn McpTool>) {
188 self.tools.insert(tool.name().to_string(), tool);
189 }
190
191 pub fn definitions(&self) -> Vec<McpToolDefinition> {
193 self.tools.values().map(|t| t.definition()).collect()
194 }
195
196 #[instrument(skip(self, args))]
198 pub async fn execute(&self, name: &str, args: Value) -> ToolCallResult {
199 info!("Executing tool: {}", name);
200
201 if !self.tools.contains_key(name) {
202 return ToolCallResult::error(format!("Tool not found: {}", name));
203 }
204
205 let browser = self.get_or_create_browser().await;
207 let browser = match browser {
208 Ok(b) => b,
209 Err(e) => return ToolCallResult::error(format!("Failed to create browser: {}", e)),
210 };
211
212 match name {
213 "web_navigate" => self.execute_navigate(&browser, args).await,
214 "web_screenshot" => self.execute_screenshot(&browser, args).await,
215 "web_pdf" => self.execute_pdf(&browser, args).await,
216 "web_extract_content" => self.execute_extract_content(&browser, args).await,
217 "web_extract_links" => self.execute_extract_links(&browser, args).await,
218 "web_extract_metadata" => self.execute_extract_metadata(&browser, args).await,
219 "web_execute_js" => self.execute_js(&browser, args).await,
220 "web_capture_mhtml" => self.execute_capture_mhtml(&browser, args).await,
221 "triangulate_sources" => self.execute_triangulate_sources(args).await,
223 "verify_claim" => self.execute_verify_claim(args).await,
224 "check_source_quality" => self.execute_check_source_quality(args).await,
225 _ => ToolCallResult::error(format!("Unknown tool: {}", name)),
226 }
227 }
228
229 async fn get_or_create_browser(&self) -> Result<BrowserController> {
231 BrowserController::new().await
234 }
235
236 async fn execute_navigate(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
237 let url = match args.get("url").and_then(|v| v.as_str()) {
238 Some(u) => u,
239 None => return ToolCallResult::error("Missing required parameter: url"),
240 };
241
242 if let Some(err) = validate_url_ssrf(url) {
244 return err;
245 }
246
247 match browser.navigate(url).await {
248 Ok(page) => {
249 let current_url = page.url().await;
250 ToolCallResult::text(format!("Successfully navigated to: {}", current_url))
251 }
252 Err(e) => {
253 error!("Navigation failed: {}", e);
254 ToolCallResult::error(format!("Navigation failed: {}", e))
255 }
256 }
257 }
258
259 async fn execute_screenshot(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
260 let url = match args.get("url").and_then(|v| v.as_str()) {
261 Some(u) => u,
262 None => return ToolCallResult::error("Missing required parameter: url"),
263 };
264
265 if let Some(err) = validate_url_ssrf(url) {
267 return err;
268 }
269
270 let full_page = args
271 .get("fullPage")
272 .and_then(|v| v.as_bool())
273 .unwrap_or(true);
274 let format_str = args.get("format").and_then(|v| v.as_str()).unwrap_or("png");
275
276 let format = match format_str {
277 "jpeg" | "jpg" => CaptureFormat::Jpeg,
278 "webp" => CaptureFormat::Webp,
279 _ => CaptureFormat::Png,
280 };
281
282 match browser.navigate(url).await {
283 Ok(page) => {
284 let options = CaptureOptions {
285 format,
286 full_page,
287 as_base64: true,
288 ..Default::default()
289 };
290
291 match PageCapture::capture(&page, &options).await {
292 Ok(result) => {
293 let base64 = result.base64.clone().unwrap_or_else(|| result.to_base64());
294 ToolCallResult::image(base64, result.mime_type())
295 }
296 Err(e) => ToolCallResult::error(format!("Screenshot failed: {}", e)),
297 }
298 }
299 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
300 }
301 }
302
303 async fn execute_pdf(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
304 let url = match args.get("url").and_then(|v| v.as_str()) {
305 Some(u) => u,
306 None => return ToolCallResult::error("Missing required parameter: url"),
307 };
308
309 if let Some(err) = validate_url_ssrf(url) {
311 return err;
312 }
313
314 match browser.navigate(url).await {
315 Ok(page) => {
316 let options = CaptureOptions::pdf();
317
318 match PageCapture::capture(&page, &options).await {
319 Ok(result) => {
320 let base64 = result.to_base64();
321 ToolCallResult::multi(vec![
322 ToolContent::text(format!("PDF generated: {} bytes", result.size)),
323 ToolContent::Resource {
324 uri: format!("pdf://{}", url),
325 resource: crate::mcp::types::ResourceContent {
326 mime_type: "application/pdf".to_string(),
327 text: None,
328 blob: Some(base64),
329 },
330 },
331 ])
332 }
333 Err(e) => ToolCallResult::error(format!("PDF generation failed: {}", e)),
334 }
335 }
336 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
337 }
338 }
339
340 async fn execute_extract_content(
341 &self,
342 browser: &BrowserController,
343 args: Value,
344 ) -> ToolCallResult {
345 let url = match args.get("url").and_then(|v| v.as_str()) {
346 Some(u) => u,
347 None => return ToolCallResult::error("Missing required parameter: url"),
348 };
349
350 if let Some(err) = validate_url_ssrf(url) {
352 return err;
353 }
354
355 let selector = args.get("selector").and_then(|v| v.as_str());
356 let format = args
357 .get("format")
358 .and_then(|v| v.as_str())
359 .unwrap_or("markdown");
360
361 match browser.navigate(url).await {
362 Ok(page) => {
363 let content = if let Some(sel) = selector {
364 ContentExtractor::extract_from_selector(&page, sel).await
365 } else {
366 ContentExtractor::extract_main_content(&page).await
367 };
368
369 match content {
370 Ok(c) => {
371 let output = match format {
372 "text" => c.text,
373 "html" => c.html,
374 _ => c.markdown.unwrap_or(c.text),
375 };
376 ToolCallResult::text(output)
377 }
378 Err(e) => ToolCallResult::error(format!("Content extraction failed: {}", e)),
379 }
380 }
381 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
382 }
383 }
384
385 async fn execute_extract_links(
386 &self,
387 browser: &BrowserController,
388 args: Value,
389 ) -> ToolCallResult {
390 let url = match args.get("url").and_then(|v| v.as_str()) {
391 Some(u) => u,
392 None => return ToolCallResult::error("Missing required parameter: url"),
393 };
394
395 if let Some(err) = validate_url_ssrf(url) {
397 return err;
398 }
399
400 let link_type = args.get("type").and_then(|v| v.as_str());
401 let selector = args.get("selector").and_then(|v| v.as_str());
402
403 match browser.navigate(url).await {
404 Ok(page) => {
405 let links = if let Some(sel) = selector {
406 LinkExtractor::extract_from_selector(&page, sel).await
407 } else {
408 match link_type {
409 Some("internal") => LinkExtractor::extract_internal(&page).await,
410 Some("external") => LinkExtractor::extract_external(&page).await,
411 _ => LinkExtractor::extract_all(&page).await,
412 }
413 };
414
415 match links {
416 Ok(links) => {
417 let json = serde_json::to_string_pretty(&links)
418 .unwrap_or_else(|_| "[]".to_string());
419 ToolCallResult::text(json)
420 }
421 Err(e) => ToolCallResult::error(format!("Link extraction failed: {}", e)),
422 }
423 }
424 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
425 }
426 }
427
428 async fn execute_extract_metadata(
429 &self,
430 browser: &BrowserController,
431 args: Value,
432 ) -> ToolCallResult {
433 let url = match args.get("url").and_then(|v| v.as_str()) {
434 Some(u) => u,
435 None => return ToolCallResult::error("Missing required parameter: url"),
436 };
437
438 if let Some(err) = validate_url_ssrf(url) {
440 return err;
441 }
442
443 match browser.navigate(url).await {
444 Ok(page) => match MetadataExtractor::extract(&page).await {
445 Ok(meta) => {
446 let json =
447 serde_json::to_string_pretty(&meta).unwrap_or_else(|_| "{}".to_string());
448 ToolCallResult::text(json)
449 }
450 Err(e) => ToolCallResult::error(format!("Metadata extraction failed: {}", e)),
451 },
452 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
453 }
454 }
455
456 async fn execute_js(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
457 let url = match args.get("url").and_then(|v| v.as_str()) {
458 Some(u) => u,
459 None => return ToolCallResult::error("Missing required parameter: url"),
460 };
461
462 if let Some(err) = validate_url_ssrf(url) {
464 return err;
465 }
466
467 let script = match args.get("script").and_then(|v| v.as_str()) {
468 Some(s) => s,
469 None => return ToolCallResult::error("Missing required parameter: script"),
470 };
471
472 match browser.navigate(url).await {
473 Ok(page) => match page.page.evaluate(script).await {
474 Ok(result) => {
475 let value: Value = result.into_value().unwrap_or(Value::Null);
476 let output =
477 serde_json::to_string_pretty(&value).unwrap_or_else(|_| "null".to_string());
478 ToolCallResult::text(output)
479 }
480 Err(e) => ToolCallResult::error(format!("JavaScript execution failed: {}", e)),
481 },
482 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
483 }
484 }
485
486 async fn execute_capture_mhtml(
487 &self,
488 browser: &BrowserController,
489 args: Value,
490 ) -> ToolCallResult {
491 let url = match args.get("url").and_then(|v| v.as_str()) {
492 Some(u) => u,
493 None => return ToolCallResult::error("Missing required parameter: url"),
494 };
495
496 if let Some(err) = validate_url_ssrf(url) {
498 return err;
499 }
500
501 match browser.navigate(url).await {
502 Ok(page) => match PageCapture::mhtml(&page).await {
503 Ok(result) => {
504 let base64 = result.to_base64();
505 ToolCallResult::multi(vec![
506 ToolContent::text(format!("MHTML captured: {} bytes", result.size)),
507 ToolContent::Resource {
508 uri: format!("mhtml://{}", url),
509 resource: crate::mcp::types::ResourceContent {
510 mime_type: "multipart/related".to_string(),
511 text: None,
512 blob: Some(base64),
513 },
514 },
515 ])
516 }
517 Err(e) => ToolCallResult::error(format!("MHTML capture failed: {}", e)),
518 },
519 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
520 }
521 }
522
523 #[instrument(skip(self, args))]
529 async fn execute_triangulate_sources(&self, args: Value) -> ToolCallResult {
530 let urls: Vec<String> = match args.get("urls") {
531 Some(Value::Array(arr)) => arr
532 .iter()
533 .filter_map(|v| v.as_str().map(|s| s.to_string()))
534 .collect(),
535 _ => {
536 return ToolCallResult::error("Missing required parameter: urls (array of strings)")
537 }
538 };
539
540 if urls.is_empty() {
541 return ToolCallResult::error("urls array cannot be empty");
542 }
543
544 for url in &urls {
546 if let Some(err) = validate_url_ssrf(url) {
547 return err;
548 }
549 }
550
551 let config = ResearchConfig::default();
552 let engine = TriangulationEngine::new(config);
553
554 let (meets_requirement, message) = engine.quick_verify(&urls);
556
557 let mut source_details: Vec<Value> = Vec::new();
559 for url in &urls {
560 let quality = engine.check_source(url);
561 source_details.push(json!({
562 "url": url,
563 "tier": format!("{:?}", quality.tier),
564 "tier_weight": quality.tier.weight(),
565 "domain": quality.domain,
566 "confidence": quality.confidence,
567 "reasons": quality.reasons,
568 }));
569 }
570
571 let result = json!({
572 "meets_triangulation": meets_requirement,
573 "message": message,
574 "min_sources_required": 3,
575 "sources_provided": urls.len(),
576 "source_details": source_details,
577 "recommendation": if meets_requirement {
578 "Sources meet triangulation requirements. Proceed with verification."
579 } else {
580 "Add more high-quality sources (Tier 1 or Tier 2) to meet triangulation requirements."
581 }
582 });
583
584 ToolCallResult::text(
585 serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string()),
586 )
587 }
588
589 #[instrument(skip(self, args))]
591 async fn execute_verify_claim(&self, args: Value) -> ToolCallResult {
592 let query = match args.get("query").and_then(|v| v.as_str()) {
593 Some(q) => q.to_string(),
594 None => return ToolCallResult::error("Missing required parameter: query"),
595 };
596
597 let urls: Vec<String> = match args.get("urls") {
598 Some(Value::Array(arr)) => arr
599 .iter()
600 .filter_map(|v| v.as_str().map(|s| s.to_string()))
601 .collect(),
602 _ => {
603 return ToolCallResult::error("Missing required parameter: urls (array of strings)")
604 }
605 };
606
607 let contents: Vec<(String, Option<String>, Option<bool>)> = match args.get("contents") {
609 Some(Value::Array(arr)) => arr
610 .iter()
611 .filter_map(|v| {
612 if let Value::Array(item) = v {
613 let url = item.first()?.as_str()?.to_string();
614 let content = item.get(1).and_then(|c| c.as_str()).map(|s| s.to_string());
615 let supports = item.get(2).and_then(|s| s.as_bool());
616 Some((url, content, supports))
617 } else {
618 None
619 }
620 })
621 .collect(),
622 _ => Vec::new(), };
624
625 for url in &urls {
627 if let Some(err) = validate_url_ssrf(url) {
628 return err;
629 }
630 }
631
632 let preset = args
634 .get("preset")
635 .and_then(|v| v.as_str())
636 .unwrap_or("default");
637
638 let config = match preset {
639 "strict" => ResearchConfig::strict(),
640 "permissive" => ResearchConfig::permissive(),
641 _ => ResearchConfig::default(),
642 };
643
644 let engine = TriangulationEngine::new(config);
645 let result = engine.research_with_urls(&query, &urls, &contents);
646
647 let response = json!({
649 "verification_status": format!("{:?}", result.status),
650 "status_description": result.status.description(),
651 "is_verified": result.is_verified(),
652 "confidence": result.confidence,
653 "query": result.query,
654 "metrics": {
655 "total_sources": result.metrics.total_sources,
656 "accessible_sources": result.metrics.accessible_sources,
657 "supporting_sources": result.metrics.supporting_sources,
658 "refuting_sources": result.metrics.refuting_sources,
659 "neutral_sources": result.metrics.neutral_sources,
660 "tier1_count": result.metrics.tier1_count,
661 "tier2_count": result.metrics.tier2_count,
662 "tier3_count": result.metrics.tier3_count,
663 "average_confidence": result.metrics.average_confidence,
664 "meets_triangulation": result.metrics.meets_triangulation(),
665 },
666 "sources": result.sources.iter().map(|s| json!({
667 "url": s.url,
668 "title": s.title,
669 "tier": format!("{:?}", s.quality.tier),
670 "supports_claim": s.supports_claim,
671 "relevance_score": s.relevance_score,
672 "content_snippet": s.content_snippet,
673 "is_usable": s.is_usable(),
674 })).collect::<Vec<_>>(),
675 "consensus": {
676 "status": format!("{:?}", result.consensus.status),
677 "confidence": result.consensus.confidence,
678 "consensus_answer": result.consensus.consensus_answer,
679 "discrepancy_count": result.consensus.discrepancies.len(),
680 },
681 "timestamp": result.timestamp.to_rfc3339(),
682 });
683
684 ToolCallResult::text(
685 serde_json::to_string_pretty(&response).unwrap_or_else(|_| "{}".to_string()),
686 )
687 }
688
689 #[instrument(skip(self, args))]
691 async fn execute_check_source_quality(&self, args: Value) -> ToolCallResult {
692 let url = match args.get("url").and_then(|v| v.as_str()) {
693 Some(u) => u,
694 None => return ToolCallResult::error("Missing required parameter: url"),
695 };
696
697 if let Some(err) = validate_url_ssrf(url) {
699 return err;
700 }
701
702 let classifier = TierClassifier::default();
703 let quality = classifier.classify(url);
704
705 let result = json!({
706 "url": url,
707 "tier": format!("{:?}", quality.tier),
708 "tier_description": match quality.tier {
709 SourceTier::Tier1 => "Authoritative (official docs, .gov, .edu, peer-reviewed)",
710 SourceTier::Tier2 => "Reputable (Wikipedia, major news, Stack Overflow)",
711 SourceTier::Tier3 => "Low quality (forums, social media, unknown)",
712 SourceTier::Unknown => "Unknown (could not classify)",
713 },
714 "tier_weight": quality.tier.weight(),
715 "domain": quality.domain,
716 "base_confidence": quality.confidence,
717 "reasons": quality.reasons,
718 "is_authoritative": quality.tier == SourceTier::Tier1,
719 "is_reputable": matches!(quality.tier, SourceTier::Tier1 | SourceTier::Tier2),
720 "recommendation": match quality.tier {
721 SourceTier::Tier1 => "Excellent source. High priority for triangulation.",
722 SourceTier::Tier2 => "Good source. Acceptable for triangulation.",
723 SourceTier::Tier3 => "Use with caution. Seek additional Tier 1/2 sources.",
724 SourceTier::Unknown => "Unknown quality. Verify manually before using.",
725 }
726 });
727
728 ToolCallResult::text(
729 serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string()),
730 )
731 }
732}
733
734impl Default for ToolRegistry {
735 fn default() -> Self {
736 Self::new()
737 }
738}
739
740struct WebNavigateTool;
746
747impl McpTool for WebNavigateTool {
748 fn name(&self) -> &str {
749 "web_navigate"
750 }
751
752 fn description(&self) -> &str {
753 "Navigate to a URL using a headless browser"
754 }
755
756 fn input_schema(&self) -> Value {
757 json!({
758 "type": "object",
759 "properties": {
760 "url": {
761 "type": "string",
762 "description": "The URL to navigate to"
763 },
764 "waitFor": {
765 "type": "string",
766 "description": "CSS selector to wait for before returning",
767 "optional": true
768 }
769 },
770 "required": ["url"]
771 })
772 }
773}
774
775struct WebScreenshotTool;
777
778impl McpTool for WebScreenshotTool {
779 fn name(&self) -> &str {
780 "web_screenshot"
781 }
782
783 fn description(&self) -> &str {
784 "Capture a screenshot of a web page"
785 }
786
787 fn input_schema(&self) -> Value {
788 json!({
789 "type": "object",
790 "properties": {
791 "url": {
792 "type": "string",
793 "description": "The URL to capture"
794 },
795 "fullPage": {
796 "type": "boolean",
797 "description": "Capture full page (default: true)",
798 "default": true
799 },
800 "format": {
801 "type": "string",
802 "enum": ["png", "jpeg", "webp"],
803 "description": "Image format (default: png)",
804 "default": "png"
805 },
806 "selector": {
807 "type": "string",
808 "description": "CSS selector to capture specific element"
809 }
810 },
811 "required": ["url"]
812 })
813 }
814}
815
816struct WebPdfTool;
818
819impl McpTool for WebPdfTool {
820 fn name(&self) -> &str {
821 "web_pdf"
822 }
823
824 fn description(&self) -> &str {
825 "Generate a PDF of a web page"
826 }
827
828 fn input_schema(&self) -> Value {
829 json!({
830 "type": "object",
831 "properties": {
832 "url": {
833 "type": "string",
834 "description": "The URL to convert to PDF"
835 },
836 "printBackground": {
837 "type": "boolean",
838 "description": "Print background graphics (default: true)",
839 "default": true
840 }
841 },
842 "required": ["url"]
843 })
844 }
845}
846
847struct WebExtractContentTool;
849
850impl McpTool for WebExtractContentTool {
851 fn name(&self) -> &str {
852 "web_extract_content"
853 }
854
855 fn description(&self) -> &str {
856 "Extract main content from a web page as text or markdown"
857 }
858
859 fn input_schema(&self) -> Value {
860 json!({
861 "type": "object",
862 "properties": {
863 "url": {
864 "type": "string",
865 "description": "The URL to extract content from"
866 },
867 "selector": {
868 "type": "string",
869 "description": "CSS selector to extract from (default: auto-detect main content)"
870 },
871 "format": {
872 "type": "string",
873 "enum": ["text", "markdown", "html"],
874 "description": "Output format (default: markdown)",
875 "default": "markdown"
876 }
877 },
878 "required": ["url"]
879 })
880 }
881}
882
883struct WebExtractLinksTool;
885
886impl McpTool for WebExtractLinksTool {
887 fn name(&self) -> &str {
888 "web_extract_links"
889 }
890
891 fn description(&self) -> &str {
892 "Extract all links from a web page with context"
893 }
894
895 fn input_schema(&self) -> Value {
896 json!({
897 "type": "object",
898 "properties": {
899 "url": {
900 "type": "string",
901 "description": "The URL to extract links from"
902 },
903 "type": {
904 "type": "string",
905 "enum": ["all", "internal", "external"],
906 "description": "Type of links to extract (default: all)",
907 "default": "all"
908 },
909 "selector": {
910 "type": "string",
911 "description": "CSS selector to extract links from"
912 }
913 },
914 "required": ["url"]
915 })
916 }
917}
918
919struct WebExtractMetadataTool;
921
922impl McpTool for WebExtractMetadataTool {
923 fn name(&self) -> &str {
924 "web_extract_metadata"
925 }
926
927 fn description(&self) -> &str {
928 "Extract page metadata (title, description, Open Graph, Twitter Card, etc.)"
929 }
930
931 fn input_schema(&self) -> Value {
932 json!({
933 "type": "object",
934 "properties": {
935 "url": {
936 "type": "string",
937 "description": "The URL to extract metadata from"
938 }
939 },
940 "required": ["url"]
941 })
942 }
943}
944
945struct WebExecuteJsTool;
947
948impl McpTool for WebExecuteJsTool {
949 fn name(&self) -> &str {
950 "web_execute_js"
951 }
952
953 fn description(&self) -> &str {
954 "Execute JavaScript on a web page and return the result"
955 }
956
957 fn input_schema(&self) -> Value {
958 json!({
959 "type": "object",
960 "properties": {
961 "url": {
962 "type": "string",
963 "description": "The URL to execute JavaScript on"
964 },
965 "script": {
966 "type": "string",
967 "description": "The JavaScript code to execute"
968 }
969 },
970 "required": ["url", "script"]
971 })
972 }
973}
974
975struct WebCaptureMhtmlTool;
977
978impl McpTool for WebCaptureMhtmlTool {
979 fn name(&self) -> &str {
980 "web_capture_mhtml"
981 }
982
983 fn description(&self) -> &str {
984 "Capture a complete web page as an MHTML archive"
985 }
986
987 fn input_schema(&self) -> Value {
988 json!({
989 "type": "object",
990 "properties": {
991 "url": {
992 "type": "string",
993 "description": "The URL to capture"
994 }
995 },
996 "required": ["url"]
997 })
998 }
999}
1000
1001struct TriangulateSourcesTool;
1011
1012impl McpTool for TriangulateSourcesTool {
1013 fn name(&self) -> &str {
1014 "triangulate_sources"
1015 }
1016
1017 fn description(&self) -> &str {
1018 "Check if sources meet triangulation requirements (CONS-006: 3+ independent sources with quality tiers)"
1019 }
1020
1021 fn input_schema(&self) -> Value {
1022 json!({
1023 "type": "object",
1024 "properties": {
1025 "urls": {
1026 "type": "array",
1027 "items": { "type": "string" },
1028 "description": "Array of source URLs to validate for triangulation",
1029 "minItems": 1
1030 }
1031 },
1032 "required": ["urls"]
1033 })
1034 }
1035}
1036
1037struct VerifyClaimTool;
1043
1044impl McpTool for VerifyClaimTool {
1045 fn name(&self) -> &str {
1046 "verify_claim"
1047 }
1048
1049 fn description(&self) -> &str {
1050 "Verify a claim using triangulated sources (3+ independent sources) with consensus analysis"
1051 }
1052
1053 fn input_schema(&self) -> Value {
1054 json!({
1055 "type": "object",
1056 "properties": {
1057 "query": {
1058 "type": "string",
1059 "description": "The claim or query to verify"
1060 },
1061 "urls": {
1062 "type": "array",
1063 "items": { "type": "string" },
1064 "description": "Array of source URLs to use for verification",
1065 "minItems": 1
1066 },
1067 "contents": {
1068 "type": "array",
1069 "description": "Optional array of [url, content_snippet, supports_claim] tuples",
1070 "items": {
1071 "type": "array",
1072 "items": [
1073 { "type": "string", "description": "URL" },
1074 { "type": ["string", "null"], "description": "Content snippet" },
1075 { "type": ["boolean", "null"], "description": "Whether content supports the claim" }
1076 ]
1077 }
1078 },
1079 "preset": {
1080 "type": "string",
1081 "enum": ["default", "strict", "permissive"],
1082 "description": "Configuration preset (default: standard 3+ sources, strict: 5+ sources Tier1 only, permissive: 2+ sources)",
1083 "default": "default"
1084 }
1085 },
1086 "required": ["query", "urls"]
1087 })
1088 }
1089}
1090
1091struct CheckSourceQualityTool;
1097
1098impl McpTool for CheckSourceQualityTool {
1099 fn name(&self) -> &str {
1100 "check_source_quality"
1101 }
1102
1103 fn description(&self) -> &str {
1104 "Assess the quality tier (Tier1/2/3) and reliability of a source URL"
1105 }
1106
1107 fn input_schema(&self) -> Value {
1108 json!({
1109 "type": "object",
1110 "properties": {
1111 "url": {
1112 "type": "string",
1113 "description": "The URL to assess for quality"
1114 }
1115 },
1116 "required": ["url"]
1117 })
1118 }
1119}
1120
1121pub const AVAILABLE_TOOLS: &[&str] = &[
1123 "web_navigate",
1125 "web_screenshot",
1126 "web_pdf",
1127 "web_extract_content",
1128 "web_extract_links",
1129 "web_extract_metadata",
1130 "web_execute_js",
1131 "web_capture_mhtml",
1132 "triangulate_sources",
1134 "verify_claim",
1135 "check_source_quality",
1136];
1137
1138#[cfg(test)]
1139mod tests {
1140 use super::*;
1141
1142 #[test]
1143 fn test_tool_registry_new() {
1144 let registry = ToolRegistry::new();
1145 assert!(registry.tools.len() >= 8);
1146 }
1147
1148 #[test]
1149 fn test_tool_definitions() {
1150 let registry = ToolRegistry::new();
1151 let defs = registry.definitions();
1152 assert!(!defs.is_empty());
1153
1154 let nav = defs.iter().find(|d| d.name == "web_navigate");
1156 assert!(nav.is_some());
1157 }
1158
1159 #[test]
1160 fn test_web_navigate_tool() {
1161 let tool = WebNavigateTool;
1162 assert_eq!(tool.name(), "web_navigate");
1163 assert!(tool.description().contains("Navigate"));
1164
1165 let schema = tool.input_schema();
1166 assert!(schema["properties"]["url"].is_object());
1167 }
1168
1169 #[test]
1170 fn test_available_tools() {
1171 assert!(AVAILABLE_TOOLS.contains(&"web_navigate"));
1172 assert!(AVAILABLE_TOOLS.contains(&"web_screenshot"));
1173 assert!(AVAILABLE_TOOLS.contains(&"web_execute_js"));
1174 }
1175
1176 #[test]
1181 fn test_ssrf_allows_public_urls() {
1182 assert!(is_url_safe("https://example.com").unwrap());
1183 assert!(is_url_safe("https://google.com/search?q=test").unwrap());
1184 assert!(is_url_safe("http://github.com").unwrap());
1185 }
1186
1187 #[test]
1188 fn test_ssrf_blocks_localhost() {
1189 assert!(!is_url_safe("http://localhost").unwrap());
1190 assert!(!is_url_safe("http://localhost:8080").unwrap());
1191 assert!(!is_url_safe("https://localhost/api").unwrap());
1192 assert!(!is_url_safe("http://127.0.0.1").unwrap());
1193 assert!(!is_url_safe("http://127.0.0.1:3000").unwrap());
1194 assert!(!is_url_safe("http://[::1]").unwrap());
1195 assert!(!is_url_safe("http://0.0.0.0").unwrap());
1196 }
1197
1198 #[test]
1199 fn test_ssrf_blocks_private_ips() {
1200 assert!(!is_url_safe("http://10.0.0.1").unwrap());
1202 assert!(!is_url_safe("http://10.255.255.255").unwrap());
1203 assert!(!is_url_safe("http://172.16.0.1").unwrap());
1204 assert!(!is_url_safe("http://172.31.255.255").unwrap());
1205 assert!(!is_url_safe("http://192.168.0.1").unwrap());
1206 assert!(!is_url_safe("http://192.168.1.100").unwrap());
1207 }
1208
1209 #[test]
1210 fn test_ssrf_blocks_cloud_metadata() {
1211 assert!(!is_url_safe("http://169.254.169.254").unwrap());
1212 assert!(!is_url_safe("http://169.254.169.254/latest/meta-data/").unwrap());
1213 assert!(!is_url_safe("http://metadata.google.internal").unwrap());
1214 assert!(!is_url_safe("http://metadata").unwrap());
1215 }
1216
1217 #[test]
1218 fn test_ssrf_blocks_internal_domains() {
1219 assert!(!is_url_safe("http://server.internal").unwrap());
1220 assert!(!is_url_safe("http://app.local").unwrap());
1221 assert!(!is_url_safe("http://db.localhost").unwrap());
1222 assert!(!is_url_safe("http://router.lan").unwrap());
1223 assert!(!is_url_safe("http://mail.corp").unwrap());
1224 assert!(!is_url_safe("http://nas.home").unwrap());
1225 }
1226
1227 #[test]
1228 fn test_ssrf_blocks_dangerous_schemes() {
1229 assert!(!is_url_safe("file:///etc/passwd").unwrap());
1230 assert!(!is_url_safe("ftp://example.com").unwrap());
1231 assert!(!is_url_safe("gopher://example.com").unwrap());
1232 assert!(!is_url_safe("javascript:alert(1)").unwrap_or(false));
1233 }
1234
1235 #[test]
1236 fn test_ssrf_blocks_cgnat_range() {
1237 assert!(!is_url_safe("http://100.64.0.1").unwrap());
1239 assert!(!is_url_safe("http://100.100.100.100").unwrap());
1240 assert!(!is_url_safe("http://100.127.255.255").unwrap());
1241 }
1242
1243 #[test]
1244 fn test_validate_url_ssrf_returns_none_for_safe_urls() {
1245 assert!(validate_url_ssrf("https://example.com").is_none());
1246 assert!(validate_url_ssrf("https://github.com/repo").is_none());
1247 }
1248
1249 #[test]
1250 fn test_validate_url_ssrf_returns_error_for_unsafe_urls() {
1251 let result = validate_url_ssrf("http://localhost:8080");
1252 assert!(result.is_some());
1253
1254 let result = validate_url_ssrf("http://169.254.169.254");
1255 assert!(result.is_some());
1256
1257 let result = validate_url_ssrf("http://192.168.1.1");
1258 assert!(result.is_some());
1259 }
1260
1261 #[test]
1266 fn test_triangulate_sources_tool() {
1267 let tool = TriangulateSourcesTool;
1268 assert_eq!(tool.name(), "triangulate_sources");
1269 assert!(tool.description().contains("CONS-006"));
1270
1271 let schema = tool.input_schema();
1272 assert!(schema["properties"]["urls"].is_object());
1273 assert_eq!(schema["required"][0], "urls");
1274 }
1275
1276 #[test]
1277 fn test_verify_claim_tool() {
1278 let tool = VerifyClaimTool;
1279 assert_eq!(tool.name(), "verify_claim");
1280 assert!(tool.description().contains("triangulated"));
1281
1282 let schema = tool.input_schema();
1283 assert!(schema["properties"]["query"].is_object());
1284 assert!(schema["properties"]["urls"].is_object());
1285 assert!(schema["properties"]["preset"].is_object());
1286 }
1287
1288 #[test]
1289 fn test_check_source_quality_tool() {
1290 let tool = CheckSourceQualityTool;
1291 assert_eq!(tool.name(), "check_source_quality");
1292 assert!(tool.description().contains("quality"));
1293
1294 let schema = tool.input_schema();
1295 assert!(schema["properties"]["url"].is_object());
1296 assert_eq!(schema["required"][0], "url");
1297 }
1298
1299 #[test]
1300 fn test_available_tools_includes_triangulation() {
1301 assert!(AVAILABLE_TOOLS.contains(&"triangulate_sources"));
1302 assert!(AVAILABLE_TOOLS.contains(&"verify_claim"));
1303 assert!(AVAILABLE_TOOLS.contains(&"check_source_quality"));
1304 }
1305
1306 #[test]
1307 fn test_tool_registry_includes_triangulation_tools() {
1308 let registry = ToolRegistry::new();
1309 let defs = registry.definitions();
1310
1311 assert!(defs.iter().any(|d| d.name == "triangulate_sources"));
1313 assert!(defs.iter().any(|d| d.name == "verify_claim"));
1314 assert!(defs.iter().any(|d| d.name == "check_source_quality"));
1315
1316 assert!(registry.tools.len() >= 11);
1318 }
1319}