reasonkit_web/mcp/
tools.rs

1//! MCP tool definitions and registry
2//!
3//! This module defines the available MCP tools and their implementations.
4//!
5//! # Triangulated Research Tools (CONS-006)
6//!
7//! This module includes tools for triangulated web research that enforce the
8//! Three-Source Rule: no claim is accepted without verification from at least
9//! 3 independent, quality sources.
10
11use crate::browser::{BrowserController, CaptureFormat, CaptureOptions, PageCapture};
12use crate::error::Result;
13use crate::extraction::{ContentExtractor, LinkExtractor, MetadataExtractor};
14use crate::mcp::types::{McpToolDefinition, ToolCallResult, ToolContent};
15use crate::research::{ResearchConfig, SourceTier, TierClassifier, TriangulationEngine};
16use serde_json::{json, Value};
17use std::collections::HashMap;
18use std::net::IpAddr;
19use std::sync::Arc;
20use tokio::sync::RwLock;
21use tracing::{error, info, instrument, warn};
22
23// ============================================================================
24// Security: SSRF Protection
25// ============================================================================
26
27/// Check if a URL is safe to access (SSRF protection)
28/// Blocks private IP ranges, localhost, cloud metadata endpoints, and dangerous schemes
29fn is_url_safe(url: &str) -> std::result::Result<bool, String> {
30    let parsed = url::Url::parse(url).map_err(|e| format!("Invalid URL: {}", e))?;
31
32    // Only allow http/https schemes
33    match parsed.scheme() {
34        "http" | "https" => {}
35        scheme => {
36            warn!(scheme = %scheme, "SSRF: Blocked scheme");
37            return Ok(false);
38        }
39    }
40
41    // Get host
42    let host = match parsed.host_str() {
43        Some(h) => h,
44        None => return Ok(false),
45    };
46
47    // Block localhost variants
48    let localhost_variants = ["localhost", "127.0.0.1", "::1", "[::1]", "0.0.0.0", "0"];
49    if localhost_variants
50        .iter()
51        .any(|&l| host.eq_ignore_ascii_case(l))
52    {
53        warn!(host = %host, "SSRF: Blocked localhost");
54        return Ok(false);
55    }
56
57    // Try to parse as IP address and check if it's public
58    if let Ok(ip) = host.parse::<IpAddr>() {
59        if !is_public_ip(&ip) {
60            warn!(ip = %ip, "SSRF: Blocked private/reserved IP");
61            return Ok(false);
62        }
63    }
64
65    // Block internal domains
66    let blocked_suffixes = [
67        ".internal",
68        ".local",
69        ".localhost",
70        ".lan",
71        ".corp",
72        ".home",
73    ];
74    if blocked_suffixes
75        .iter()
76        .any(|&s| host.to_lowercase().ends_with(s))
77    {
78        warn!(host = %host, "SSRF: Blocked internal domain");
79        return Ok(false);
80    }
81
82    // Block cloud metadata endpoints
83    let blocked_hosts = [
84        "169.254.169.254",          // AWS/GCP/Azure metadata
85        "metadata.google.internal", // GCP
86        "metadata",                 // Various cloud providers
87    ];
88    if blocked_hosts.iter().any(|&h| host.eq_ignore_ascii_case(h)) {
89        warn!(host = %host, "SSRF: Blocked cloud metadata endpoint");
90        return Ok(false);
91    }
92
93    Ok(true)
94}
95
96/// Check if an IP address is public (not private/reserved)
97fn is_public_ip(ip: &IpAddr) -> bool {
98    match ip {
99        IpAddr::V4(ipv4) => {
100            // RFC 1918 private ranges and other reserved ranges
101            !ipv4.is_private()
102                && !ipv4.is_loopback()
103                && !ipv4.is_link_local()
104                && !ipv4.is_broadcast()
105                && !ipv4.is_documentation()
106                && !ipv4.is_unspecified()
107                // 100.64.0.0/10 (CGNAT)
108                && !(ipv4.octets()[0] == 100 && (64..=127).contains(&ipv4.octets()[1]))
109                // 192.0.0.0/24 (IETF Protocol)
110                && !(ipv4.octets()[0] == 192 && ipv4.octets()[1] == 0 && ipv4.octets()[2] == 0)
111        }
112        IpAddr::V6(ipv6) => {
113            !ipv6.is_loopback()
114                && !ipv6.is_unspecified()
115                // Check for link-local (fe80::/10)
116                && (ipv6.segments()[0] & 0xffc0) != 0xfe80
117                // Check for unique local (fc00::/7)
118                && (ipv6.segments()[0] & 0xfe00) != 0xfc00
119        }
120    }
121}
122
123/// Validate URL for SSRF and return error result if unsafe
124fn validate_url_ssrf(url: &str) -> Option<ToolCallResult> {
125    match is_url_safe(url) {
126        Ok(true) => None, // URL is safe, continue
127        Ok(false) => Some(ToolCallResult::error(format!(
128            "SSRF protection: URL '{}' is not allowed (private IP, localhost, or blocked endpoint)",
129            url
130        ))),
131        Err(e) => Some(ToolCallResult::error(format!("Invalid URL: {}", e))),
132    }
133}
134
135/// A registered MCP tool
136pub trait McpTool: Send + Sync {
137    /// Tool name
138    fn name(&self) -> &str;
139    /// Tool description
140    fn description(&self) -> &str;
141    /// Input schema as JSON
142    fn input_schema(&self) -> Value;
143    /// Get tool definition
144    fn definition(&self) -> McpToolDefinition {
145        McpToolDefinition {
146            name: self.name().to_string(),
147            description: self.description().to_string(),
148            input_schema: self.input_schema(),
149        }
150    }
151}
152
153/// Tool registry holding all available tools
154pub struct ToolRegistry {
155    tools: HashMap<String, Box<dyn McpTool>>,
156    #[allow(dead_code)]
157    browser: Arc<RwLock<Option<BrowserController>>>,
158}
159
160impl ToolRegistry {
161    /// Create a new tool registry with all built-in tools
162    pub fn new() -> Self {
163        let mut registry = Self {
164            tools: HashMap::new(),
165            browser: Arc::new(RwLock::new(None)),
166        };
167
168        // Register all built-in tools
169        registry.register(Box::new(WebNavigateTool));
170        registry.register(Box::new(WebScreenshotTool));
171        registry.register(Box::new(WebPdfTool));
172        registry.register(Box::new(WebExtractContentTool));
173        registry.register(Box::new(WebExtractLinksTool));
174        registry.register(Box::new(WebExtractMetadataTool));
175        registry.register(Box::new(WebExecuteJsTool));
176        registry.register(Box::new(WebCaptureMhtmlTool));
177
178        // Register triangulated research tools (CONS-006)
179        registry.register(Box::new(TriangulateSourcesTool));
180        registry.register(Box::new(VerifyClaimTool));
181        registry.register(Box::new(CheckSourceQualityTool));
182
183        registry
184    }
185
186    /// Register a tool
187    pub fn register(&mut self, tool: Box<dyn McpTool>) {
188        self.tools.insert(tool.name().to_string(), tool);
189    }
190
191    /// Get all tool definitions
192    pub fn definitions(&self) -> Vec<McpToolDefinition> {
193        self.tools.values().map(|t| t.definition()).collect()
194    }
195
196    /// Execute a tool by name
197    #[instrument(skip(self, args))]
198    pub async fn execute(&self, name: &str, args: Value) -> ToolCallResult {
199        info!("Executing tool: {}", name);
200
201        if !self.tools.contains_key(name) {
202            return ToolCallResult::error(format!("Tool not found: {}", name));
203        }
204
205        // Ensure browser is available
206        let browser = self.get_or_create_browser().await;
207        let browser = match browser {
208            Ok(b) => b,
209            Err(e) => return ToolCallResult::error(format!("Failed to create browser: {}", e)),
210        };
211
212        match name {
213            "web_navigate" => self.execute_navigate(&browser, args).await,
214            "web_screenshot" => self.execute_screenshot(&browser, args).await,
215            "web_pdf" => self.execute_pdf(&browser, args).await,
216            "web_extract_content" => self.execute_extract_content(&browser, args).await,
217            "web_extract_links" => self.execute_extract_links(&browser, args).await,
218            "web_extract_metadata" => self.execute_extract_metadata(&browser, args).await,
219            "web_execute_js" => self.execute_js(&browser, args).await,
220            "web_capture_mhtml" => self.execute_capture_mhtml(&browser, args).await,
221            // Triangulated research tools (CONS-006)
222            "triangulate_sources" => self.execute_triangulate_sources(args).await,
223            "verify_claim" => self.execute_verify_claim(args).await,
224            "check_source_quality" => self.execute_check_source_quality(args).await,
225            _ => ToolCallResult::error(format!("Unknown tool: {}", name)),
226        }
227    }
228
229    /// Get or create browser instance
230    async fn get_or_create_browser(&self) -> Result<BrowserController> {
231        // For simplicity, create a new browser each time
232        // In production, you'd want to pool/reuse browsers
233        BrowserController::new().await
234    }
235
236    async fn execute_navigate(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
237        let url = match args.get("url").and_then(|v| v.as_str()) {
238            Some(u) => u,
239            None => return ToolCallResult::error("Missing required parameter: url"),
240        };
241
242        // SSRF protection: validate URL before navigation
243        if let Some(err) = validate_url_ssrf(url) {
244            return err;
245        }
246
247        match browser.navigate(url).await {
248            Ok(page) => {
249                let current_url = page.url().await;
250                ToolCallResult::text(format!("Successfully navigated to: {}", current_url))
251            }
252            Err(e) => {
253                error!("Navigation failed: {}", e);
254                ToolCallResult::error(format!("Navigation failed: {}", e))
255            }
256        }
257    }
258
259    async fn execute_screenshot(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
260        let url = match args.get("url").and_then(|v| v.as_str()) {
261            Some(u) => u,
262            None => return ToolCallResult::error("Missing required parameter: url"),
263        };
264
265        // SSRF protection: validate URL before screenshot
266        if let Some(err) = validate_url_ssrf(url) {
267            return err;
268        }
269
270        let full_page = args
271            .get("fullPage")
272            .and_then(|v| v.as_bool())
273            .unwrap_or(true);
274        let format_str = args.get("format").and_then(|v| v.as_str()).unwrap_or("png");
275
276        let format = match format_str {
277            "jpeg" | "jpg" => CaptureFormat::Jpeg,
278            "webp" => CaptureFormat::Webp,
279            _ => CaptureFormat::Png,
280        };
281
282        match browser.navigate(url).await {
283            Ok(page) => {
284                let options = CaptureOptions {
285                    format,
286                    full_page,
287                    as_base64: true,
288                    ..Default::default()
289                };
290
291                match PageCapture::capture(&page, &options).await {
292                    Ok(result) => {
293                        let base64 = result.base64.clone().unwrap_or_else(|| result.to_base64());
294                        ToolCallResult::image(base64, result.mime_type())
295                    }
296                    Err(e) => ToolCallResult::error(format!("Screenshot failed: {}", e)),
297                }
298            }
299            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
300        }
301    }
302
303    async fn execute_pdf(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
304        let url = match args.get("url").and_then(|v| v.as_str()) {
305            Some(u) => u,
306            None => return ToolCallResult::error("Missing required parameter: url"),
307        };
308
309        // SSRF protection: validate URL before PDF generation
310        if let Some(err) = validate_url_ssrf(url) {
311            return err;
312        }
313
314        match browser.navigate(url).await {
315            Ok(page) => {
316                let options = CaptureOptions::pdf();
317
318                match PageCapture::capture(&page, &options).await {
319                    Ok(result) => {
320                        let base64 = result.to_base64();
321                        ToolCallResult::multi(vec![
322                            ToolContent::text(format!("PDF generated: {} bytes", result.size)),
323                            ToolContent::Resource {
324                                uri: format!("pdf://{}", url),
325                                resource: crate::mcp::types::ResourceContent {
326                                    mime_type: "application/pdf".to_string(),
327                                    text: None,
328                                    blob: Some(base64),
329                                },
330                            },
331                        ])
332                    }
333                    Err(e) => ToolCallResult::error(format!("PDF generation failed: {}", e)),
334                }
335            }
336            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
337        }
338    }
339
340    async fn execute_extract_content(
341        &self,
342        browser: &BrowserController,
343        args: Value,
344    ) -> ToolCallResult {
345        let url = match args.get("url").and_then(|v| v.as_str()) {
346            Some(u) => u,
347            None => return ToolCallResult::error("Missing required parameter: url"),
348        };
349
350        // SSRF protection: validate URL before content extraction
351        if let Some(err) = validate_url_ssrf(url) {
352            return err;
353        }
354
355        let selector = args.get("selector").and_then(|v| v.as_str());
356        let format = args
357            .get("format")
358            .and_then(|v| v.as_str())
359            .unwrap_or("markdown");
360
361        match browser.navigate(url).await {
362            Ok(page) => {
363                let content = if let Some(sel) = selector {
364                    ContentExtractor::extract_from_selector(&page, sel).await
365                } else {
366                    ContentExtractor::extract_main_content(&page).await
367                };
368
369                match content {
370                    Ok(c) => {
371                        let output = match format {
372                            "text" => c.text,
373                            "html" => c.html,
374                            _ => c.markdown.unwrap_or(c.text),
375                        };
376                        ToolCallResult::text(output)
377                    }
378                    Err(e) => ToolCallResult::error(format!("Content extraction failed: {}", e)),
379                }
380            }
381            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
382        }
383    }
384
385    async fn execute_extract_links(
386        &self,
387        browser: &BrowserController,
388        args: Value,
389    ) -> ToolCallResult {
390        let url = match args.get("url").and_then(|v| v.as_str()) {
391            Some(u) => u,
392            None => return ToolCallResult::error("Missing required parameter: url"),
393        };
394
395        // SSRF protection: validate URL before link extraction
396        if let Some(err) = validate_url_ssrf(url) {
397            return err;
398        }
399
400        let link_type = args.get("type").and_then(|v| v.as_str());
401        let selector = args.get("selector").and_then(|v| v.as_str());
402
403        match browser.navigate(url).await {
404            Ok(page) => {
405                let links = if let Some(sel) = selector {
406                    LinkExtractor::extract_from_selector(&page, sel).await
407                } else {
408                    match link_type {
409                        Some("internal") => LinkExtractor::extract_internal(&page).await,
410                        Some("external") => LinkExtractor::extract_external(&page).await,
411                        _ => LinkExtractor::extract_all(&page).await,
412                    }
413                };
414
415                match links {
416                    Ok(links) => {
417                        let json = serde_json::to_string_pretty(&links)
418                            .unwrap_or_else(|_| "[]".to_string());
419                        ToolCallResult::text(json)
420                    }
421                    Err(e) => ToolCallResult::error(format!("Link extraction failed: {}", e)),
422                }
423            }
424            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
425        }
426    }
427
428    async fn execute_extract_metadata(
429        &self,
430        browser: &BrowserController,
431        args: Value,
432    ) -> ToolCallResult {
433        let url = match args.get("url").and_then(|v| v.as_str()) {
434            Some(u) => u,
435            None => return ToolCallResult::error("Missing required parameter: url"),
436        };
437
438        // SSRF protection: validate URL before metadata extraction
439        if let Some(err) = validate_url_ssrf(url) {
440            return err;
441        }
442
443        match browser.navigate(url).await {
444            Ok(page) => match MetadataExtractor::extract(&page).await {
445                Ok(meta) => {
446                    let json =
447                        serde_json::to_string_pretty(&meta).unwrap_or_else(|_| "{}".to_string());
448                    ToolCallResult::text(json)
449                }
450                Err(e) => ToolCallResult::error(format!("Metadata extraction failed: {}", e)),
451            },
452            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
453        }
454    }
455
456    async fn execute_js(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
457        let url = match args.get("url").and_then(|v| v.as_str()) {
458            Some(u) => u,
459            None => return ToolCallResult::error("Missing required parameter: url"),
460        };
461
462        // SSRF protection: validate URL before JavaScript execution
463        if let Some(err) = validate_url_ssrf(url) {
464            return err;
465        }
466
467        let script = match args.get("script").and_then(|v| v.as_str()) {
468            Some(s) => s,
469            None => return ToolCallResult::error("Missing required parameter: script"),
470        };
471
472        match browser.navigate(url).await {
473            Ok(page) => match page.page.evaluate(script).await {
474                Ok(result) => {
475                    let value: Value = result.into_value().unwrap_or(Value::Null);
476                    let output =
477                        serde_json::to_string_pretty(&value).unwrap_or_else(|_| "null".to_string());
478                    ToolCallResult::text(output)
479                }
480                Err(e) => ToolCallResult::error(format!("JavaScript execution failed: {}", e)),
481            },
482            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
483        }
484    }
485
486    async fn execute_capture_mhtml(
487        &self,
488        browser: &BrowserController,
489        args: Value,
490    ) -> ToolCallResult {
491        let url = match args.get("url").and_then(|v| v.as_str()) {
492            Some(u) => u,
493            None => return ToolCallResult::error("Missing required parameter: url"),
494        };
495
496        // SSRF protection: validate URL before MHTML capture
497        if let Some(err) = validate_url_ssrf(url) {
498            return err;
499        }
500
501        match browser.navigate(url).await {
502            Ok(page) => match PageCapture::mhtml(&page).await {
503                Ok(result) => {
504                    let base64 = result.to_base64();
505                    ToolCallResult::multi(vec![
506                        ToolContent::text(format!("MHTML captured: {} bytes", result.size)),
507                        ToolContent::Resource {
508                            uri: format!("mhtml://{}", url),
509                            resource: crate::mcp::types::ResourceContent {
510                                mime_type: "multipart/related".to_string(),
511                                text: None,
512                                blob: Some(base64),
513                            },
514                        },
515                    ])
516                }
517                Err(e) => ToolCallResult::error(format!("MHTML capture failed: {}", e)),
518            },
519            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
520        }
521    }
522
523    // ========================================================================
524    // Triangulated Research Tools (CONS-006)
525    // ========================================================================
526
527    /// Execute triangulate_sources: Check if sources meet triangulation requirements
528    #[instrument(skip(self, args))]
529    async fn execute_triangulate_sources(&self, args: Value) -> ToolCallResult {
530        let urls: Vec<String> = match args.get("urls") {
531            Some(Value::Array(arr)) => arr
532                .iter()
533                .filter_map(|v| v.as_str().map(|s| s.to_string()))
534                .collect(),
535            _ => {
536                return ToolCallResult::error("Missing required parameter: urls (array of strings)")
537            }
538        };
539
540        if urls.is_empty() {
541            return ToolCallResult::error("urls array cannot be empty");
542        }
543
544        // Validate all URLs for SSRF
545        for url in &urls {
546            if let Some(err) = validate_url_ssrf(url) {
547                return err;
548            }
549        }
550
551        let config = ResearchConfig::default();
552        let engine = TriangulationEngine::new(config);
553
554        // Quick verification without fetching content
555        let (meets_requirement, message) = engine.quick_verify(&urls);
556
557        // Get detailed quality info for each source
558        let mut source_details: Vec<Value> = Vec::new();
559        for url in &urls {
560            let quality = engine.check_source(url);
561            source_details.push(json!({
562                "url": url,
563                "tier": format!("{:?}", quality.tier),
564                "tier_weight": quality.tier.weight(),
565                "domain": quality.domain,
566                "confidence": quality.confidence,
567                "reasons": quality.reasons,
568            }));
569        }
570
571        let result = json!({
572            "meets_triangulation": meets_requirement,
573            "message": message,
574            "min_sources_required": 3,
575            "sources_provided": urls.len(),
576            "source_details": source_details,
577            "recommendation": if meets_requirement {
578                "Sources meet triangulation requirements. Proceed with verification."
579            } else {
580                "Add more high-quality sources (Tier 1 or Tier 2) to meet triangulation requirements."
581            }
582        });
583
584        ToolCallResult::text(
585            serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string()),
586        )
587    }
588
589    /// Execute verify_claim: Full verification with claim analysis
590    #[instrument(skip(self, args))]
591    async fn execute_verify_claim(&self, args: Value) -> ToolCallResult {
592        let query = match args.get("query").and_then(|v| v.as_str()) {
593            Some(q) => q.to_string(),
594            None => return ToolCallResult::error("Missing required parameter: query"),
595        };
596
597        let urls: Vec<String> = match args.get("urls") {
598            Some(Value::Array(arr)) => arr
599                .iter()
600                .filter_map(|v| v.as_str().map(|s| s.to_string()))
601                .collect(),
602            _ => {
603                return ToolCallResult::error("Missing required parameter: urls (array of strings)")
604            }
605        };
606
607        // Parse contents array: each element is [url, content_snippet, supports_claim]
608        let contents: Vec<(String, Option<String>, Option<bool>)> = match args.get("contents") {
609            Some(Value::Array(arr)) => arr
610                .iter()
611                .filter_map(|v| {
612                    if let Value::Array(item) = v {
613                        let url = item.first()?.as_str()?.to_string();
614                        let content = item.get(1).and_then(|c| c.as_str()).map(|s| s.to_string());
615                        let supports = item.get(2).and_then(|s| s.as_bool());
616                        Some((url, content, supports))
617                    } else {
618                        None
619                    }
620                })
621                .collect(),
622            _ => Vec::new(), // contents is optional
623        };
624
625        // Validate all URLs for SSRF
626        for url in &urls {
627            if let Some(err) = validate_url_ssrf(url) {
628                return err;
629            }
630        }
631
632        // Get config preset
633        let preset = args
634            .get("preset")
635            .and_then(|v| v.as_str())
636            .unwrap_or("default");
637
638        let config = match preset {
639            "strict" => ResearchConfig::strict(),
640            "permissive" => ResearchConfig::permissive(),
641            _ => ResearchConfig::default(),
642        };
643
644        let engine = TriangulationEngine::new(config);
645        let result = engine.research_with_urls(&query, &urls, &contents);
646
647        // Build response
648        let response = json!({
649            "verification_status": format!("{:?}", result.status),
650            "status_description": result.status.description(),
651            "is_verified": result.is_verified(),
652            "confidence": result.confidence,
653            "query": result.query,
654            "metrics": {
655                "total_sources": result.metrics.total_sources,
656                "accessible_sources": result.metrics.accessible_sources,
657                "supporting_sources": result.metrics.supporting_sources,
658                "refuting_sources": result.metrics.refuting_sources,
659                "neutral_sources": result.metrics.neutral_sources,
660                "tier1_count": result.metrics.tier1_count,
661                "tier2_count": result.metrics.tier2_count,
662                "tier3_count": result.metrics.tier3_count,
663                "average_confidence": result.metrics.average_confidence,
664                "meets_triangulation": result.metrics.meets_triangulation(),
665            },
666            "sources": result.sources.iter().map(|s| json!({
667                "url": s.url,
668                "title": s.title,
669                "tier": format!("{:?}", s.quality.tier),
670                "supports_claim": s.supports_claim,
671                "relevance_score": s.relevance_score,
672                "content_snippet": s.content_snippet,
673                "is_usable": s.is_usable(),
674            })).collect::<Vec<_>>(),
675            "consensus": {
676                "status": format!("{:?}", result.consensus.status),
677                "confidence": result.consensus.confidence,
678                "consensus_answer": result.consensus.consensus_answer,
679                "discrepancy_count": result.consensus.discrepancies.len(),
680            },
681            "timestamp": result.timestamp.to_rfc3339(),
682        });
683
684        ToolCallResult::text(
685            serde_json::to_string_pretty(&response).unwrap_or_else(|_| "{}".to_string()),
686        )
687    }
688
689    /// Execute check_source_quality: Get quality assessment for a URL
690    #[instrument(skip(self, args))]
691    async fn execute_check_source_quality(&self, args: Value) -> ToolCallResult {
692        let url = match args.get("url").and_then(|v| v.as_str()) {
693            Some(u) => u,
694            None => return ToolCallResult::error("Missing required parameter: url"),
695        };
696
697        // SSRF protection
698        if let Some(err) = validate_url_ssrf(url) {
699            return err;
700        }
701
702        let classifier = TierClassifier::default();
703        let quality = classifier.classify(url);
704
705        let result = json!({
706            "url": url,
707            "tier": format!("{:?}", quality.tier),
708            "tier_description": match quality.tier {
709                SourceTier::Tier1 => "Authoritative (official docs, .gov, .edu, peer-reviewed)",
710                SourceTier::Tier2 => "Reputable (Wikipedia, major news, Stack Overflow)",
711                SourceTier::Tier3 => "Low quality (forums, social media, unknown)",
712                SourceTier::Unknown => "Unknown (could not classify)",
713            },
714            "tier_weight": quality.tier.weight(),
715            "domain": quality.domain,
716            "base_confidence": quality.confidence,
717            "reasons": quality.reasons,
718            "is_authoritative": quality.tier == SourceTier::Tier1,
719            "is_reputable": matches!(quality.tier, SourceTier::Tier1 | SourceTier::Tier2),
720            "recommendation": match quality.tier {
721                SourceTier::Tier1 => "Excellent source. High priority for triangulation.",
722                SourceTier::Tier2 => "Good source. Acceptable for triangulation.",
723                SourceTier::Tier3 => "Use with caution. Seek additional Tier 1/2 sources.",
724                SourceTier::Unknown => "Unknown quality. Verify manually before using.",
725            }
726        });
727
728        ToolCallResult::text(
729            serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string()),
730        )
731    }
732}
733
734impl Default for ToolRegistry {
735    fn default() -> Self {
736        Self::new()
737    }
738}
739
740// ============================================================================
741// Tool Definitions
742// ============================================================================
743
744/// Navigate to a URL
745struct WebNavigateTool;
746
747impl McpTool for WebNavigateTool {
748    fn name(&self) -> &str {
749        "web_navigate"
750    }
751
752    fn description(&self) -> &str {
753        "Navigate to a URL using a headless browser"
754    }
755
756    fn input_schema(&self) -> Value {
757        json!({
758            "type": "object",
759            "properties": {
760                "url": {
761                    "type": "string",
762                    "description": "The URL to navigate to"
763                },
764                "waitFor": {
765                    "type": "string",
766                    "description": "CSS selector to wait for before returning",
767                    "optional": true
768                }
769            },
770            "required": ["url"]
771        })
772    }
773}
774
775/// Capture screenshot
776struct WebScreenshotTool;
777
778impl McpTool for WebScreenshotTool {
779    fn name(&self) -> &str {
780        "web_screenshot"
781    }
782
783    fn description(&self) -> &str {
784        "Capture a screenshot of a web page"
785    }
786
787    fn input_schema(&self) -> Value {
788        json!({
789            "type": "object",
790            "properties": {
791                "url": {
792                    "type": "string",
793                    "description": "The URL to capture"
794                },
795                "fullPage": {
796                    "type": "boolean",
797                    "description": "Capture full page (default: true)",
798                    "default": true
799                },
800                "format": {
801                    "type": "string",
802                    "enum": ["png", "jpeg", "webp"],
803                    "description": "Image format (default: png)",
804                    "default": "png"
805                },
806                "selector": {
807                    "type": "string",
808                    "description": "CSS selector to capture specific element"
809                }
810            },
811            "required": ["url"]
812        })
813    }
814}
815
816/// Generate PDF
817struct WebPdfTool;
818
819impl McpTool for WebPdfTool {
820    fn name(&self) -> &str {
821        "web_pdf"
822    }
823
824    fn description(&self) -> &str {
825        "Generate a PDF of a web page"
826    }
827
828    fn input_schema(&self) -> Value {
829        json!({
830            "type": "object",
831            "properties": {
832                "url": {
833                    "type": "string",
834                    "description": "The URL to convert to PDF"
835                },
836                "printBackground": {
837                    "type": "boolean",
838                    "description": "Print background graphics (default: true)",
839                    "default": true
840                }
841            },
842            "required": ["url"]
843        })
844    }
845}
846
847/// Extract content
848struct WebExtractContentTool;
849
850impl McpTool for WebExtractContentTool {
851    fn name(&self) -> &str {
852        "web_extract_content"
853    }
854
855    fn description(&self) -> &str {
856        "Extract main content from a web page as text or markdown"
857    }
858
859    fn input_schema(&self) -> Value {
860        json!({
861            "type": "object",
862            "properties": {
863                "url": {
864                    "type": "string",
865                    "description": "The URL to extract content from"
866                },
867                "selector": {
868                    "type": "string",
869                    "description": "CSS selector to extract from (default: auto-detect main content)"
870                },
871                "format": {
872                    "type": "string",
873                    "enum": ["text", "markdown", "html"],
874                    "description": "Output format (default: markdown)",
875                    "default": "markdown"
876                }
877            },
878            "required": ["url"]
879        })
880    }
881}
882
883/// Extract links
884struct WebExtractLinksTool;
885
886impl McpTool for WebExtractLinksTool {
887    fn name(&self) -> &str {
888        "web_extract_links"
889    }
890
891    fn description(&self) -> &str {
892        "Extract all links from a web page with context"
893    }
894
895    fn input_schema(&self) -> Value {
896        json!({
897            "type": "object",
898            "properties": {
899                "url": {
900                    "type": "string",
901                    "description": "The URL to extract links from"
902                },
903                "type": {
904                    "type": "string",
905                    "enum": ["all", "internal", "external"],
906                    "description": "Type of links to extract (default: all)",
907                    "default": "all"
908                },
909                "selector": {
910                    "type": "string",
911                    "description": "CSS selector to extract links from"
912                }
913            },
914            "required": ["url"]
915        })
916    }
917}
918
919/// Extract metadata
920struct WebExtractMetadataTool;
921
922impl McpTool for WebExtractMetadataTool {
923    fn name(&self) -> &str {
924        "web_extract_metadata"
925    }
926
927    fn description(&self) -> &str {
928        "Extract page metadata (title, description, Open Graph, Twitter Card, etc.)"
929    }
930
931    fn input_schema(&self) -> Value {
932        json!({
933            "type": "object",
934            "properties": {
935                "url": {
936                    "type": "string",
937                    "description": "The URL to extract metadata from"
938                }
939            },
940            "required": ["url"]
941        })
942    }
943}
944
945/// Execute JavaScript
946struct WebExecuteJsTool;
947
948impl McpTool for WebExecuteJsTool {
949    fn name(&self) -> &str {
950        "web_execute_js"
951    }
952
953    fn description(&self) -> &str {
954        "Execute JavaScript on a web page and return the result"
955    }
956
957    fn input_schema(&self) -> Value {
958        json!({
959            "type": "object",
960            "properties": {
961                "url": {
962                    "type": "string",
963                    "description": "The URL to execute JavaScript on"
964                },
965                "script": {
966                    "type": "string",
967                    "description": "The JavaScript code to execute"
968                }
969            },
970            "required": ["url", "script"]
971        })
972    }
973}
974
975/// Capture MHTML
976struct WebCaptureMhtmlTool;
977
978impl McpTool for WebCaptureMhtmlTool {
979    fn name(&self) -> &str {
980        "web_capture_mhtml"
981    }
982
983    fn description(&self) -> &str {
984        "Capture a complete web page as an MHTML archive"
985    }
986
987    fn input_schema(&self) -> Value {
988        json!({
989            "type": "object",
990            "properties": {
991                "url": {
992                    "type": "string",
993                    "description": "The URL to capture"
994                }
995            },
996            "required": ["url"]
997        })
998    }
999}
1000
1001// ============================================================================
1002// Triangulated Research Tools (CONS-006)
1003// ============================================================================
1004
1005/// Triangulate Sources - Check if URLs meet triangulation requirements
1006///
1007/// This tool verifies that a set of source URLs meets the Three-Source Rule
1008/// (CONS-006) before performing full verification. Use this to pre-validate
1009/// your sources before running expensive verification operations.
1010struct TriangulateSourcesTool;
1011
1012impl McpTool for TriangulateSourcesTool {
1013    fn name(&self) -> &str {
1014        "triangulate_sources"
1015    }
1016
1017    fn description(&self) -> &str {
1018        "Check if sources meet triangulation requirements (CONS-006: 3+ independent sources with quality tiers)"
1019    }
1020
1021    fn input_schema(&self) -> Value {
1022        json!({
1023            "type": "object",
1024            "properties": {
1025                "urls": {
1026                    "type": "array",
1027                    "items": { "type": "string" },
1028                    "description": "Array of source URLs to validate for triangulation",
1029                    "minItems": 1
1030                }
1031            },
1032            "required": ["urls"]
1033        })
1034    }
1035}
1036
1037/// Verify Claim - Full triangulated verification with consensus analysis
1038///
1039/// This tool performs comprehensive claim verification using multiple sources,
1040/// analyzing consensus, detecting conflicts, and providing confidence metrics.
1041/// Requires at least 3 sources for full verification (CONS-006 compliance).
1042struct VerifyClaimTool;
1043
1044impl McpTool for VerifyClaimTool {
1045    fn name(&self) -> &str {
1046        "verify_claim"
1047    }
1048
1049    fn description(&self) -> &str {
1050        "Verify a claim using triangulated sources (3+ independent sources) with consensus analysis"
1051    }
1052
1053    fn input_schema(&self) -> Value {
1054        json!({
1055            "type": "object",
1056            "properties": {
1057                "query": {
1058                    "type": "string",
1059                    "description": "The claim or query to verify"
1060                },
1061                "urls": {
1062                    "type": "array",
1063                    "items": { "type": "string" },
1064                    "description": "Array of source URLs to use for verification",
1065                    "minItems": 1
1066                },
1067                "contents": {
1068                    "type": "array",
1069                    "description": "Optional array of [url, content_snippet, supports_claim] tuples",
1070                    "items": {
1071                        "type": "array",
1072                        "items": [
1073                            { "type": "string", "description": "URL" },
1074                            { "type": ["string", "null"], "description": "Content snippet" },
1075                            { "type": ["boolean", "null"], "description": "Whether content supports the claim" }
1076                        ]
1077                    }
1078                },
1079                "preset": {
1080                    "type": "string",
1081                    "enum": ["default", "strict", "permissive"],
1082                    "description": "Configuration preset (default: standard 3+ sources, strict: 5+ sources Tier1 only, permissive: 2+ sources)",
1083                    "default": "default"
1084                }
1085            },
1086            "required": ["query", "urls"]
1087        })
1088    }
1089}
1090
1091/// Check Source Quality - Get quality assessment for a URL
1092///
1093/// This tool assesses the quality tier and reliability of a source URL
1094/// without fetching its content. Use to evaluate sources before using them
1095/// in triangulated verification.
1096struct CheckSourceQualityTool;
1097
1098impl McpTool for CheckSourceQualityTool {
1099    fn name(&self) -> &str {
1100        "check_source_quality"
1101    }
1102
1103    fn description(&self) -> &str {
1104        "Assess the quality tier (Tier1/2/3) and reliability of a source URL"
1105    }
1106
1107    fn input_schema(&self) -> Value {
1108        json!({
1109            "type": "object",
1110            "properties": {
1111                "url": {
1112                    "type": "string",
1113                    "description": "The URL to assess for quality"
1114                }
1115            },
1116            "required": ["url"]
1117        })
1118    }
1119}
1120
1121/// List of all available tools (for documentation)
1122pub const AVAILABLE_TOOLS: &[&str] = &[
1123    // Browser automation tools
1124    "web_navigate",
1125    "web_screenshot",
1126    "web_pdf",
1127    "web_extract_content",
1128    "web_extract_links",
1129    "web_extract_metadata",
1130    "web_execute_js",
1131    "web_capture_mhtml",
1132    // Triangulated research tools (CONS-006)
1133    "triangulate_sources",
1134    "verify_claim",
1135    "check_source_quality",
1136];
1137
1138#[cfg(test)]
1139mod tests {
1140    use super::*;
1141
1142    #[test]
1143    fn test_tool_registry_new() {
1144        let registry = ToolRegistry::new();
1145        assert!(registry.tools.len() >= 8);
1146    }
1147
1148    #[test]
1149    fn test_tool_definitions() {
1150        let registry = ToolRegistry::new();
1151        let defs = registry.definitions();
1152        assert!(!defs.is_empty());
1153
1154        // Check that web_navigate exists
1155        let nav = defs.iter().find(|d| d.name == "web_navigate");
1156        assert!(nav.is_some());
1157    }
1158
1159    #[test]
1160    fn test_web_navigate_tool() {
1161        let tool = WebNavigateTool;
1162        assert_eq!(tool.name(), "web_navigate");
1163        assert!(tool.description().contains("Navigate"));
1164
1165        let schema = tool.input_schema();
1166        assert!(schema["properties"]["url"].is_object());
1167    }
1168
1169    #[test]
1170    fn test_available_tools() {
1171        assert!(AVAILABLE_TOOLS.contains(&"web_navigate"));
1172        assert!(AVAILABLE_TOOLS.contains(&"web_screenshot"));
1173        assert!(AVAILABLE_TOOLS.contains(&"web_execute_js"));
1174    }
1175
1176    // ============================================================================
1177    // SSRF Protection Tests
1178    // ============================================================================
1179
1180    #[test]
1181    fn test_ssrf_allows_public_urls() {
1182        assert!(is_url_safe("https://example.com").unwrap());
1183        assert!(is_url_safe("https://google.com/search?q=test").unwrap());
1184        assert!(is_url_safe("http://github.com").unwrap());
1185    }
1186
1187    #[test]
1188    fn test_ssrf_blocks_localhost() {
1189        assert!(!is_url_safe("http://localhost").unwrap());
1190        assert!(!is_url_safe("http://localhost:8080").unwrap());
1191        assert!(!is_url_safe("https://localhost/api").unwrap());
1192        assert!(!is_url_safe("http://127.0.0.1").unwrap());
1193        assert!(!is_url_safe("http://127.0.0.1:3000").unwrap());
1194        assert!(!is_url_safe("http://[::1]").unwrap());
1195        assert!(!is_url_safe("http://0.0.0.0").unwrap());
1196    }
1197
1198    #[test]
1199    fn test_ssrf_blocks_private_ips() {
1200        // RFC 1918 private ranges
1201        assert!(!is_url_safe("http://10.0.0.1").unwrap());
1202        assert!(!is_url_safe("http://10.255.255.255").unwrap());
1203        assert!(!is_url_safe("http://172.16.0.1").unwrap());
1204        assert!(!is_url_safe("http://172.31.255.255").unwrap());
1205        assert!(!is_url_safe("http://192.168.0.1").unwrap());
1206        assert!(!is_url_safe("http://192.168.1.100").unwrap());
1207    }
1208
1209    #[test]
1210    fn test_ssrf_blocks_cloud_metadata() {
1211        assert!(!is_url_safe("http://169.254.169.254").unwrap());
1212        assert!(!is_url_safe("http://169.254.169.254/latest/meta-data/").unwrap());
1213        assert!(!is_url_safe("http://metadata.google.internal").unwrap());
1214        assert!(!is_url_safe("http://metadata").unwrap());
1215    }
1216
1217    #[test]
1218    fn test_ssrf_blocks_internal_domains() {
1219        assert!(!is_url_safe("http://server.internal").unwrap());
1220        assert!(!is_url_safe("http://app.local").unwrap());
1221        assert!(!is_url_safe("http://db.localhost").unwrap());
1222        assert!(!is_url_safe("http://router.lan").unwrap());
1223        assert!(!is_url_safe("http://mail.corp").unwrap());
1224        assert!(!is_url_safe("http://nas.home").unwrap());
1225    }
1226
1227    #[test]
1228    fn test_ssrf_blocks_dangerous_schemes() {
1229        assert!(!is_url_safe("file:///etc/passwd").unwrap());
1230        assert!(!is_url_safe("ftp://example.com").unwrap());
1231        assert!(!is_url_safe("gopher://example.com").unwrap());
1232        assert!(!is_url_safe("javascript:alert(1)").unwrap_or(false));
1233    }
1234
1235    #[test]
1236    fn test_ssrf_blocks_cgnat_range() {
1237        // 100.64.0.0/10 (CGNAT)
1238        assert!(!is_url_safe("http://100.64.0.1").unwrap());
1239        assert!(!is_url_safe("http://100.100.100.100").unwrap());
1240        assert!(!is_url_safe("http://100.127.255.255").unwrap());
1241    }
1242
1243    #[test]
1244    fn test_validate_url_ssrf_returns_none_for_safe_urls() {
1245        assert!(validate_url_ssrf("https://example.com").is_none());
1246        assert!(validate_url_ssrf("https://github.com/repo").is_none());
1247    }
1248
1249    #[test]
1250    fn test_validate_url_ssrf_returns_error_for_unsafe_urls() {
1251        let result = validate_url_ssrf("http://localhost:8080");
1252        assert!(result.is_some());
1253
1254        let result = validate_url_ssrf("http://169.254.169.254");
1255        assert!(result.is_some());
1256
1257        let result = validate_url_ssrf("http://192.168.1.1");
1258        assert!(result.is_some());
1259    }
1260
1261    // ============================================================================
1262    // Triangulated Research Tools Tests (CONS-006)
1263    // ============================================================================
1264
1265    #[test]
1266    fn test_triangulate_sources_tool() {
1267        let tool = TriangulateSourcesTool;
1268        assert_eq!(tool.name(), "triangulate_sources");
1269        assert!(tool.description().contains("CONS-006"));
1270
1271        let schema = tool.input_schema();
1272        assert!(schema["properties"]["urls"].is_object());
1273        assert_eq!(schema["required"][0], "urls");
1274    }
1275
1276    #[test]
1277    fn test_verify_claim_tool() {
1278        let tool = VerifyClaimTool;
1279        assert_eq!(tool.name(), "verify_claim");
1280        assert!(tool.description().contains("triangulated"));
1281
1282        let schema = tool.input_schema();
1283        assert!(schema["properties"]["query"].is_object());
1284        assert!(schema["properties"]["urls"].is_object());
1285        assert!(schema["properties"]["preset"].is_object());
1286    }
1287
1288    #[test]
1289    fn test_check_source_quality_tool() {
1290        let tool = CheckSourceQualityTool;
1291        assert_eq!(tool.name(), "check_source_quality");
1292        assert!(tool.description().contains("quality"));
1293
1294        let schema = tool.input_schema();
1295        assert!(schema["properties"]["url"].is_object());
1296        assert_eq!(schema["required"][0], "url");
1297    }
1298
1299    #[test]
1300    fn test_available_tools_includes_triangulation() {
1301        assert!(AVAILABLE_TOOLS.contains(&"triangulate_sources"));
1302        assert!(AVAILABLE_TOOLS.contains(&"verify_claim"));
1303        assert!(AVAILABLE_TOOLS.contains(&"check_source_quality"));
1304    }
1305
1306    #[test]
1307    fn test_tool_registry_includes_triangulation_tools() {
1308        let registry = ToolRegistry::new();
1309        let defs = registry.definitions();
1310
1311        // Check triangulation tools exist
1312        assert!(defs.iter().any(|d| d.name == "triangulate_sources"));
1313        assert!(defs.iter().any(|d| d.name == "verify_claim"));
1314        assert!(defs.iter().any(|d| d.name == "check_source_quality"));
1315
1316        // Registry should have at least 11 tools now (8 browser + 3 triangulation)
1317        assert!(registry.tools.len() >= 11);
1318    }
1319}
reasonkit_web/mcp/tools.rs

reasonkit_web/mcp/
tools.rs