reasonkit-web 0.1.7

//! MCP tool definitions and registry
//!
//! This module defines the available MCP tools and their implementations.
//!
//! # Triangulated Research Tools (CONS-006)
//!
//! This module includes tools for triangulated web research that enforce the
//! Three-Source Rule: no claim is accepted without verification from at least
//! 3 independent, quality sources.

use crate::browser::{BrowserController, CaptureFormat, CaptureOptions, PageCapture};
use crate::error::Result;
use crate::extraction::{ContentExtractor, LinkExtractor, MetadataExtractor};
use crate::mcp::types::{McpToolDefinition, ToolCallResult, ToolContent};
use crate::research::{ResearchConfig, SourceTier, TierClassifier, TriangulationEngine};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::net::IpAddr;
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{error, info, instrument, warn};

// ============================================================================
// Security: SSRF Protection
// ============================================================================

/// Check if a URL is safe to access (SSRF protection)
/// Blocks private IP ranges, localhost, cloud metadata endpoints, and dangerous schemes
fn is_url_safe(url: &str) -> std::result::Result<bool, String> {
    let parsed = url::Url::parse(url).map_err(|e| format!("Invalid URL: {}", e))?;

    // Only allow http/https schemes
    match parsed.scheme() {
        "http" | "https" => {}
        scheme => {
            warn!(scheme = %scheme, "SSRF: Blocked scheme");
            return Ok(false);
        }
    }

    // Get host
    let host = match parsed.host_str() {
        Some(h) => h,
        None => return Ok(false),
    };

    // Block localhost variants
    let localhost_variants = ["localhost", "127.0.0.1", "::1", "[::1]", "0.0.0.0", "0"];
    if localhost_variants
        .iter()
        .any(|&l| host.eq_ignore_ascii_case(l))
    {
        warn!(host = %host, "SSRF: Blocked localhost");
        return Ok(false);
    }

    // Try to parse as IP address and check if it's public
    if let Ok(ip) = host.parse::<IpAddr>() {
        if !is_public_ip(&ip) {
            warn!(ip = %ip, "SSRF: Blocked private/reserved IP");
            return Ok(false);
        }
    }

    // Block internal domains
    let blocked_suffixes = [
        ".internal",
        ".local",
        ".localhost",
        ".lan",
        ".corp",
        ".home",
    ];
    if blocked_suffixes
        .iter()
        .any(|&s| host.to_lowercase().ends_with(s))
    {
        warn!(host = %host, "SSRF: Blocked internal domain");
        return Ok(false);
    }

    // Block cloud metadata endpoints
    let blocked_hosts = [
        "169.254.169.254",          // AWS/GCP/Azure metadata
        "metadata.google.internal", // GCP
        "metadata",                 // Various cloud providers
    ];
    if blocked_hosts.iter().any(|&h| host.eq_ignore_ascii_case(h)) {
        warn!(host = %host, "SSRF: Blocked cloud metadata endpoint");
        return Ok(false);
    }

    Ok(true)
}

/// Check if an IP address is public (not private/reserved)
fn is_public_ip(ip: &IpAddr) -> bool {
    match ip {
        IpAddr::V4(ipv4) => {
            // RFC 1918 private ranges and other reserved ranges
            !ipv4.is_private()
                && !ipv4.is_loopback()
                && !ipv4.is_link_local()
                && !ipv4.is_broadcast()
                && !ipv4.is_documentation()
                && !ipv4.is_unspecified()
                // 100.64.0.0/10 (CGNAT)
                && !(ipv4.octets()[0] == 100 && (64..=127).contains(&ipv4.octets()[1]))
                // 192.0.0.0/24 (IETF Protocol)
                && !(ipv4.octets()[0] == 192 && ipv4.octets()[1] == 0 && ipv4.octets()[2] == 0)
        }
        IpAddr::V6(ipv6) => {
            !ipv6.is_loopback()
                && !ipv6.is_unspecified()
                // Check for link-local (fe80::/10)
                && (ipv6.segments()[0] & 0xffc0) != 0xfe80
                // Check for unique local (fc00::/7)
                && (ipv6.segments()[0] & 0xfe00) != 0xfc00
        }
    }
}

/// Validate URL for SSRF and return error result if unsafe
fn validate_url_ssrf(url: &str) -> Option<ToolCallResult> {
    match is_url_safe(url) {
        Ok(true) => None, // URL is safe, continue
        Ok(false) => Some(ToolCallResult::error(format!(
            "SSRF protection: URL '{}' is not allowed (private IP, localhost, or blocked endpoint)",
            url
        ))),
        Err(e) => Some(ToolCallResult::error(format!("Invalid URL: {}", e))),
    }
}

/// A registered MCP tool
pub trait McpTool: Send + Sync {
    /// Tool name
    fn name(&self) -> &str;
    /// Tool description
    fn description(&self) -> &str;
    /// Input schema as JSON
    fn input_schema(&self) -> Value;
    /// Get tool definition
    fn definition(&self) -> McpToolDefinition {
        McpToolDefinition {
            name: self.name().to_string(),
            description: self.description().to_string(),
            input_schema: self.input_schema(),
        }
    }
}

/// Tool registry holding all available tools
pub struct ToolRegistry {
    tools: HashMap<String, Box<dyn McpTool>>,
    #[allow(dead_code)]
    browser: Arc<RwLock<Option<BrowserController>>>,
}

impl ToolRegistry {
    /// Create a new tool registry with all built-in tools
    pub fn new() -> Self {
        let mut registry = Self {
            tools: HashMap::new(),
            browser: Arc::new(RwLock::new(None)),
        };

        // Register all built-in tools
        registry.register(Box::new(WebNavigateTool));
        registry.register(Box::new(WebScreenshotTool));
        registry.register(Box::new(WebPdfTool));
        registry.register(Box::new(WebExtractContentTool));
        registry.register(Box::new(WebExtractLinksTool));
        registry.register(Box::new(WebExtractMetadataTool));
        registry.register(Box::new(WebExecuteJsTool));
        registry.register(Box::new(WebCaptureMhtmlTool));

        // Register triangulated research tools (CONS-006)
        registry.register(Box::new(TriangulateSourcesTool));
        registry.register(Box::new(VerifyClaimTool));
        registry.register(Box::new(CheckSourceQualityTool));

        registry
    }

    /// Register a tool
    pub fn register(&mut self, tool: Box<dyn McpTool>) {
        self.tools.insert(tool.name().to_string(), tool);
    }

    /// Get all tool definitions
    pub fn definitions(&self) -> Vec<McpToolDefinition> {
        self.tools.values().map(|t| t.definition()).collect()
    }

    /// Execute a tool by name
    #[instrument(skip(self, args))]
    pub async fn execute(&self, name: &str, args: Value) -> ToolCallResult {
        info!("Executing tool: {}", name);

        if !self.tools.contains_key(name) {
            return ToolCallResult::error(format!("Tool not found: {}", name));
        }

        // Ensure browser is available
        let browser = self.get_or_create_browser().await;
        let browser = match browser {
            Ok(b) => b,
            Err(e) => return ToolCallResult::error(format!("Failed to create browser: {}", e)),
        };

        match name {
            "web_navigate" => self.execute_navigate(&browser, args).await,
            "web_screenshot" => self.execute_screenshot(&browser, args).await,
            "web_pdf" => self.execute_pdf(&browser, args).await,
            "web_extract_content" => self.execute_extract_content(&browser, args).await,
            "web_extract_links" => self.execute_extract_links(&browser, args).await,
            "web_extract_metadata" => self.execute_extract_metadata(&browser, args).await,
            "web_execute_js" => self.execute_js(&browser, args).await,
            "web_capture_mhtml" => self.execute_capture_mhtml(&browser, args).await,
            // Triangulated research tools (CONS-006)
            "triangulate_sources" => self.execute_triangulate_sources(args).await,
            "verify_claim" => self.execute_verify_claim(args).await,
            "check_source_quality" => self.execute_check_source_quality(args).await,
            _ => ToolCallResult::error(format!("Unknown tool: {}", name)),
        }
    }

    /// Get or create browser instance
    async fn get_or_create_browser(&self) -> Result<BrowserController> {
        // For simplicity, create a new browser each time
        // In production, you'd want to pool/reuse browsers
        BrowserController::new().await
    }

    async fn execute_navigate(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
        let url = match args.get("url").and_then(|v| v.as_str()) {
            Some(u) => u,
            None => return ToolCallResult::error("Missing required parameter: url"),
        };

        // SSRF protection: validate URL before navigation
        if let Some(err) = validate_url_ssrf(url) {
            return err;
        }

        match browser.navigate(url).await {
            Ok(page) => {
                let current_url = page.url().await;
                ToolCallResult::text(format!("Successfully navigated to: {}", current_url))
            }
            Err(e) => {
                error!("Navigation failed: {}", e);
                ToolCallResult::error(format!("Navigation failed: {}", e))
            }
        }
    }

    async fn execute_screenshot(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
        let url = match args.get("url").and_then(|v| v.as_str()) {
            Some(u) => u,
            None => return ToolCallResult::error("Missing required parameter: url"),
        };

        // SSRF protection: validate URL before screenshot
        if let Some(err) = validate_url_ssrf(url) {
            return err;
        }

        let full_page = args
            .get("fullPage")
            .and_then(|v| v.as_bool())
            .unwrap_or(true);
        let format_str = args.get("format").and_then(|v| v.as_str()).unwrap_or("png");

        let format = match format_str {
            "jpeg" | "jpg" => CaptureFormat::Jpeg,
            "webp" => CaptureFormat::Webp,
            _ => CaptureFormat::Png,
        };

        match browser.navigate(url).await {
            Ok(page) => {
                let options = CaptureOptions {
                    format,
                    full_page,
                    as_base64: true,
                    ..Default::default()
                };

                match PageCapture::capture(&page, &options).await {
                    Ok(result) => {
                        let base64 = result.base64.clone().unwrap_or_else(|| result.to_base64());
                        ToolCallResult::image(base64, result.mime_type())
                    }
                    Err(e) => ToolCallResult::error(format!("Screenshot failed: {}", e)),
                }
            }
            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
        }
    }

    async fn execute_pdf(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
        let url = match args.get("url").and_then(|v| v.as_str()) {
            Some(u) => u,
            None => return ToolCallResult::error("Missing required parameter: url"),
        };

        // SSRF protection: validate URL before PDF generation
        if let Some(err) = validate_url_ssrf(url) {
            return err;
        }

        match browser.navigate(url).await {
            Ok(page) => {
                let options = CaptureOptions::pdf();

                match PageCapture::capture(&page, &options).await {
                    Ok(result) => {
                        let base64 = result.to_base64();
                        ToolCallResult::multi(vec![
                            ToolContent::text(format!("PDF generated: {} bytes", result.size)),
                            ToolContent::Resource {
                                uri: format!("pdf://{}", url),
                                resource: crate::mcp::types::ResourceContent {
                                    mime_type: "application/pdf".to_string(),
                                    text: None,
                                    blob: Some(base64),
                                },
                            },
                        ])
                    }
                    Err(e) => ToolCallResult::error(format!("PDF generation failed: {}", e)),
                }
            }
            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
        }
    }

    async fn execute_extract_content(
        &self,
        browser: &BrowserController,
        args: Value,
    ) -> ToolCallResult {
        let url = match args.get("url").and_then(|v| v.as_str()) {
            Some(u) => u,
            None => return ToolCallResult::error("Missing required parameter: url"),
        };

        // SSRF protection: validate URL before content extraction
        if let Some(err) = validate_url_ssrf(url) {
            return err;
        }

        let selector = args.get("selector").and_then(|v| v.as_str());
        let format = args
            .get("format")
            .and_then(|v| v.as_str())
            .unwrap_or("markdown");

        match browser.navigate(url).await {
            Ok(page) => {
                let content = if let Some(sel) = selector {
                    ContentExtractor::extract_from_selector(&page, sel).await
                } else {
                    ContentExtractor::extract_main_content(&page).await
                };

                match content {
                    Ok(c) => {
                        let output = match format {
                            "text" => c.text,
                            "html" => c.html,
                            _ => c.markdown.unwrap_or(c.text),
                        };
                        ToolCallResult::text(output)
                    }
                    Err(e) => ToolCallResult::error(format!("Content extraction failed: {}", e)),
                }
            }
            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
        }
    }

    async fn execute_extract_links(
        &self,
        browser: &BrowserController,
        args: Value,
    ) -> ToolCallResult {
        let url = match args.get("url").and_then(|v| v.as_str()) {
            Some(u) => u,
            None => return ToolCallResult::error("Missing required parameter: url"),
        };

        // SSRF protection: validate URL before link extraction
        if let Some(err) = validate_url_ssrf(url) {
            return err;
        }

        let link_type = args.get("type").and_then(|v| v.as_str());
        let selector = args.get("selector").and_then(|v| v.as_str());

        match browser.navigate(url).await {
            Ok(page) => {
                let links = if let Some(sel) = selector {
                    LinkExtractor::extract_from_selector(&page, sel).await
                } else {
                    match link_type {
                        Some("internal") => LinkExtractor::extract_internal(&page).await,
                        Some("external") => LinkExtractor::extract_external(&page).await,
                        _ => LinkExtractor::extract_all(&page).await,
                    }
                };

                match links {
                    Ok(links) => {
                        let json = serde_json::to_string_pretty(&links)
                            .unwrap_or_else(|_| "[]".to_string());
                        ToolCallResult::text(json)
                    }
                    Err(e) => ToolCallResult::error(format!("Link extraction failed: {}", e)),
                }
            }
            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
        }
    }

    async fn execute_extract_metadata(
        &self,
        browser: &BrowserController,
        args: Value,
    ) -> ToolCallResult {
        let url = match args.get("url").and_then(|v| v.as_str()) {
            Some(u) => u,
            None => return ToolCallResult::error("Missing required parameter: url"),
        };

        // SSRF protection: validate URL before metadata extraction
        if let Some(err) = validate_url_ssrf(url) {
            return err;
        }

        match browser.navigate(url).await {
            Ok(page) => match MetadataExtractor::extract(&page).await {
                Ok(meta) => {
                    let json =
                        serde_json::to_string_pretty(&meta).unwrap_or_else(|_| "{}".to_string());
                    ToolCallResult::text(json)
                }
                Err(e) => ToolCallResult::error(format!("Metadata extraction failed: {}", e)),
            },
            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
        }
    }

    async fn execute_js(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
        let url = match args.get("url").and_then(|v| v.as_str()) {
            Some(u) => u,
            None => return ToolCallResult::error("Missing required parameter: url"),
        };

        // SSRF protection: validate URL before JavaScript execution
        if let Some(err) = validate_url_ssrf(url) {
            return err;
        }

        let script = match args.get("script").and_then(|v| v.as_str()) {
            Some(s) => s,
            None => return ToolCallResult::error("Missing required parameter: script"),
        };

        match browser.navigate(url).await {
            Ok(page) => match page.page.evaluate(script).await {
                Ok(result) => {
                    let value: Value = result.into_value().unwrap_or(Value::Null);
                    let output =
                        serde_json::to_string_pretty(&value).unwrap_or_else(|_| "null".to_string());
                    ToolCallResult::text(output)
                }
                Err(e) => ToolCallResult::error(format!("JavaScript execution failed: {}", e)),
            },
            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
        }
    }

    async fn execute_capture_mhtml(
        &self,
        browser: &BrowserController,
        args: Value,
    ) -> ToolCallResult {
        let url = match args.get("url").and_then(|v| v.as_str()) {
            Some(u) => u,
            None => return ToolCallResult::error("Missing required parameter: url"),
        };

        // SSRF protection: validate URL before MHTML capture
        if let Some(err) = validate_url_ssrf(url) {
            return err;
        }

        match browser.navigate(url).await {
            Ok(page) => match PageCapture::mhtml(&page).await {
                Ok(result) => {
                    let base64 = result.to_base64();
                    ToolCallResult::multi(vec![
                        ToolContent::text(format!("MHTML captured: {} bytes", result.size)),
                        ToolContent::Resource {
                            uri: format!("mhtml://{}", url),
                            resource: crate::mcp::types::ResourceContent {
                                mime_type: "multipart/related".to_string(),
                                text: None,
                                blob: Some(base64),
                            },
                        },
                    ])
                }
                Err(e) => ToolCallResult::error(format!("MHTML capture failed: {}", e)),
            },
            Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
        }
    }

    // ========================================================================
    // Triangulated Research Tools (CONS-006)
    // ========================================================================

    /// Execute triangulate_sources: Check if sources meet triangulation requirements
    #[instrument(skip(self, args))]
    async fn execute_triangulate_sources(&self, args: Value) -> ToolCallResult {
        let urls: Vec<String> = match args.get("urls") {
            Some(Value::Array(arr)) => arr
                .iter()
                .filter_map(|v| v.as_str().map(|s| s.to_string()))
                .collect(),
            _ => {
                return ToolCallResult::error("Missing required parameter: urls (array of strings)")
            }
        };

        if urls.is_empty() {
            return ToolCallResult::error("urls array cannot be empty");
        }

        // Validate all URLs for SSRF
        for url in &urls {
            if let Some(err) = validate_url_ssrf(url) {
                return err;
            }
        }

        let config = ResearchConfig::default();
        let engine = TriangulationEngine::new(config);

        // Quick verification without fetching content
        let (meets_requirement, message) = engine.quick_verify(&urls);

        // Get detailed quality info for each source
        let mut source_details: Vec<Value> = Vec::new();
        for url in &urls {
            let quality = engine.check_source(url);
            source_details.push(json!({
                "url": url,
                "tier": format!("{:?}", quality.tier),
                "tier_weight": quality.tier.weight(),
                "domain": quality.domain,
                "confidence": quality.confidence,
                "reasons": quality.reasons,
            }));
        }

        let result = json!({
            "meets_triangulation": meets_requirement,
            "message": message,
            "min_sources_required": 3,
            "sources_provided": urls.len(),
            "source_details": source_details,
            "recommendation": if meets_requirement {
                "Sources meet triangulation requirements. Proceed with verification."
            } else {
                "Add more high-quality sources (Tier 1 or Tier 2) to meet triangulation requirements."
            }
        });

        ToolCallResult::text(
            serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string()),
        )
    }

    /// Execute verify_claim: Full verification with claim analysis
    #[instrument(skip(self, args))]
    async fn execute_verify_claim(&self, args: Value) -> ToolCallResult {
        let query = match args.get("query").and_then(|v| v.as_str()) {
            Some(q) => q.to_string(),
            None => return ToolCallResult::error("Missing required parameter: query"),
        };

        let urls: Vec<String> = match args.get("urls") {
            Some(Value::Array(arr)) => arr
                .iter()
                .filter_map(|v| v.as_str().map(|s| s.to_string()))
                .collect(),
            _ => {
                return ToolCallResult::error("Missing required parameter: urls (array of strings)")
            }
        };

        // Parse contents array: each element is [url, content_snippet, supports_claim]
        let contents: Vec<(String, Option<String>, Option<bool>)> = match args.get("contents") {
            Some(Value::Array(arr)) => arr
                .iter()
                .filter_map(|v| {
                    if let Value::Array(item) = v {
                        let url = item.first()?.as_str()?.to_string();
                        let content = item.get(1).and_then(|c| c.as_str()).map(|s| s.to_string());
                        let supports = item.get(2).and_then(|s| s.as_bool());
                        Some((url, content, supports))
                    } else {
                        None
                    }
                })
                .collect(),
            _ => Vec::new(), // contents is optional
        };

        // Validate all URLs for SSRF
        for url in &urls {
            if let Some(err) = validate_url_ssrf(url) {
                return err;
            }
        }

        // Get config preset
        let preset = args
            .get("preset")
            .and_then(|v| v.as_str())
            .unwrap_or("default");

        let config = match preset {
            "strict" => ResearchConfig::strict(),
            "permissive" => ResearchConfig::permissive(),
            _ => ResearchConfig::default(),
        };

        let engine = TriangulationEngine::new(config);
        let result = engine.research_with_urls(&query, &urls, &contents);

        // Build response
        let response = json!({
            "verification_status": format!("{:?}", result.status),
            "status_description": result.status.description(),
            "is_verified": result.is_verified(),
            "confidence": result.confidence,
            "query": result.query,
            "metrics": {
                "total_sources": result.metrics.total_sources,
                "accessible_sources": result.metrics.accessible_sources,
                "supporting_sources": result.metrics.supporting_sources,
                "refuting_sources": result.metrics.refuting_sources,
                "neutral_sources": result.metrics.neutral_sources,
                "tier1_count": result.metrics.tier1_count,
                "tier2_count": result.metrics.tier2_count,
                "tier3_count": result.metrics.tier3_count,
                "average_confidence": result.metrics.average_confidence,
                "meets_triangulation": result.metrics.meets_triangulation(),
            },
            "sources": result.sources.iter().map(|s| json!({
                "url": s.url,
                "title": s.title,
                "tier": format!("{:?}", s.quality.tier),
                "supports_claim": s.supports_claim,
                "relevance_score": s.relevance_score,
                "content_snippet": s.content_snippet,
                "is_usable": s.is_usable(),
            })).collect::<Vec<_>>(),
            "consensus": {
                "status": format!("{:?}", result.consensus.status),
                "confidence": result.consensus.confidence,
                "consensus_answer": result.consensus.consensus_answer,
                "discrepancy_count": result.consensus.discrepancies.len(),
            },
            "timestamp": result.timestamp.to_rfc3339(),
        });

        ToolCallResult::text(
            serde_json::to_string_pretty(&response).unwrap_or_else(|_| "{}".to_string()),
        )
    }

    /// Execute check_source_quality: Get quality assessment for a URL
    #[instrument(skip(self, args))]
    async fn execute_check_source_quality(&self, args: Value) -> ToolCallResult {
        let url = match args.get("url").and_then(|v| v.as_str()) {
            Some(u) => u,
            None => return ToolCallResult::error("Missing required parameter: url"),
        };

        // SSRF protection
        if let Some(err) = validate_url_ssrf(url) {
            return err;
        }

        let classifier = TierClassifier::default();
        let quality = classifier.classify(url);

        let result = json!({
            "url": url,
            "tier": format!("{:?}", quality.tier),
            "tier_description": match quality.tier {
                SourceTier::Tier1 => "Authoritative (official docs, .gov, .edu, peer-reviewed)",
                SourceTier::Tier2 => "Reputable (Wikipedia, major news, Stack Overflow)",
                SourceTier::Tier3 => "Low quality (forums, social media, unknown)",
                SourceTier::Unknown => "Unknown (could not classify)",
            },
            "tier_weight": quality.tier.weight(),
            "domain": quality.domain,
            "base_confidence": quality.confidence,
            "reasons": quality.reasons,
            "is_authoritative": quality.tier == SourceTier::Tier1,
            "is_reputable": matches!(quality.tier, SourceTier::Tier1 | SourceTier::Tier2),
            "recommendation": match quality.tier {
                SourceTier::Tier1 => "Excellent source. High priority for triangulation.",
                SourceTier::Tier2 => "Good source. Acceptable for triangulation.",
                SourceTier::Tier3 => "Use with caution. Seek additional Tier 1/2 sources.",
                SourceTier::Unknown => "Unknown quality. Verify manually before using.",
            }
        });

        ToolCallResult::text(
            serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string()),
        )
    }
}

impl Default for ToolRegistry {
    fn default() -> Self {
        Self::new()
    }
}

// ============================================================================
// Tool Definitions
// ============================================================================

/// Navigate to a URL
struct WebNavigateTool;

impl McpTool for WebNavigateTool {
    fn name(&self) -> &str {
        "web_navigate"
    }

    fn description(&self) -> &str {
        "Navigate to a URL using a headless browser"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to navigate to"
                },
                "waitFor": {
                    "type": "string",
                    "description": "CSS selector to wait for before returning",
                    "optional": true
                }
            },
            "required": ["url"]
        })
    }
}

/// Capture screenshot
struct WebScreenshotTool;

impl McpTool for WebScreenshotTool {
    fn name(&self) -> &str {
        "web_screenshot"
    }

    fn description(&self) -> &str {
        "Capture a screenshot of a web page"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to capture"
                },
                "fullPage": {
                    "type": "boolean",
                    "description": "Capture full page (default: true)",
                    "default": true
                },
                "format": {
                    "type": "string",
                    "enum": ["png", "jpeg", "webp"],
                    "description": "Image format (default: png)",
                    "default": "png"
                },
                "selector": {
                    "type": "string",
                    "description": "CSS selector to capture specific element"
                }
            },
            "required": ["url"]
        })
    }
}

/// Generate PDF
struct WebPdfTool;

impl McpTool for WebPdfTool {
    fn name(&self) -> &str {
        "web_pdf"
    }

    fn description(&self) -> &str {
        "Generate a PDF of a web page"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to convert to PDF"
                },
                "printBackground": {
                    "type": "boolean",
                    "description": "Print background graphics (default: true)",
                    "default": true
                }
            },
            "required": ["url"]
        })
    }
}

/// Extract content
struct WebExtractContentTool;

impl McpTool for WebExtractContentTool {
    fn name(&self) -> &str {
        "web_extract_content"
    }

    fn description(&self) -> &str {
        "Extract main content from a web page as text or markdown"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to extract content from"
                },
                "selector": {
                    "type": "string",
                    "description": "CSS selector to extract from (default: auto-detect main content)"
                },
                "format": {
                    "type": "string",
                    "enum": ["text", "markdown", "html"],
                    "description": "Output format (default: markdown)",
                    "default": "markdown"
                }
            },
            "required": ["url"]
        })
    }
}

/// Extract links
struct WebExtractLinksTool;

impl McpTool for WebExtractLinksTool {
    fn name(&self) -> &str {
        "web_extract_links"
    }

    fn description(&self) -> &str {
        "Extract all links from a web page with context"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to extract links from"
                },
                "type": {
                    "type": "string",
                    "enum": ["all", "internal", "external"],
                    "description": "Type of links to extract (default: all)",
                    "default": "all"
                },
                "selector": {
                    "type": "string",
                    "description": "CSS selector to extract links from"
                }
            },
            "required": ["url"]
        })
    }
}

/// Extract metadata
struct WebExtractMetadataTool;

impl McpTool for WebExtractMetadataTool {
    fn name(&self) -> &str {
        "web_extract_metadata"
    }

    fn description(&self) -> &str {
        "Extract page metadata (title, description, Open Graph, Twitter Card, etc.)"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to extract metadata from"
                }
            },
            "required": ["url"]
        })
    }
}

/// Execute JavaScript
struct WebExecuteJsTool;

impl McpTool for WebExecuteJsTool {
    fn name(&self) -> &str {
        "web_execute_js"
    }

    fn description(&self) -> &str {
        "Execute JavaScript on a web page and return the result"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to execute JavaScript on"
                },
                "script": {
                    "type": "string",
                    "description": "The JavaScript code to execute"
                }
            },
            "required": ["url", "script"]
        })
    }
}

/// Capture MHTML
struct WebCaptureMhtmlTool;

impl McpTool for WebCaptureMhtmlTool {
    fn name(&self) -> &str {
        "web_capture_mhtml"
    }

    fn description(&self) -> &str {
        "Capture a complete web page as an MHTML archive"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to capture"
                }
            },
            "required": ["url"]
        })
    }
}

// ============================================================================
// Triangulated Research Tools (CONS-006)
// ============================================================================

/// Triangulate Sources - Check if URLs meet triangulation requirements
///
/// This tool verifies that a set of source URLs meets the Three-Source Rule
/// (CONS-006) before performing full verification. Use this to pre-validate
/// your sources before running expensive verification operations.
struct TriangulateSourcesTool;

impl McpTool for TriangulateSourcesTool {
    fn name(&self) -> &str {
        "triangulate_sources"
    }

    fn description(&self) -> &str {
        "Check if sources meet triangulation requirements (CONS-006: 3+ independent sources with quality tiers)"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "urls": {
                    "type": "array",
                    "items": { "type": "string" },
                    "description": "Array of source URLs to validate for triangulation",
                    "minItems": 1
                }
            },
            "required": ["urls"]
        })
    }
}

/// Verify Claim - Full triangulated verification with consensus analysis
///
/// This tool performs comprehensive claim verification using multiple sources,
/// analyzing consensus, detecting conflicts, and providing confidence metrics.
/// Requires at least 3 sources for full verification (CONS-006 compliance).
struct VerifyClaimTool;

impl McpTool for VerifyClaimTool {
    fn name(&self) -> &str {
        "verify_claim"
    }

    fn description(&self) -> &str {
        "Verify a claim using triangulated sources (3+ independent sources) with consensus analysis"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The claim or query to verify"
                },
                "urls": {
                    "type": "array",
                    "items": { "type": "string" },
                    "description": "Array of source URLs to use for verification",
                    "minItems": 1
                },
                "contents": {
                    "type": "array",
                    "description": "Optional array of [url, content_snippet, supports_claim] tuples",
                    "items": {
                        "type": "array",
                        "items": [
                            { "type": "string", "description": "URL" },
                            { "type": ["string", "null"], "description": "Content snippet" },
                            { "type": ["boolean", "null"], "description": "Whether content supports the claim" }
                        ]
                    }
                },
                "preset": {
                    "type": "string",
                    "enum": ["default", "strict", "permissive"],
                    "description": "Configuration preset (default: standard 3+ sources, strict: 5+ sources Tier1 only, permissive: 2+ sources)",
                    "default": "default"
                }
            },
            "required": ["query", "urls"]
        })
    }
}

/// Check Source Quality - Get quality assessment for a URL
///
/// This tool assesses the quality tier and reliability of a source URL
/// without fetching its content. Use to evaluate sources before using them
/// in triangulated verification.
struct CheckSourceQualityTool;

impl McpTool for CheckSourceQualityTool {
    fn name(&self) -> &str {
        "check_source_quality"
    }

    fn description(&self) -> &str {
        "Assess the quality tier (Tier1/2/3) and reliability of a source URL"
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to assess for quality"
                }
            },
            "required": ["url"]
        })
    }
}

/// List of all available tools (for documentation)
pub const AVAILABLE_TOOLS: &[&str] = &[
    // Browser automation tools
    "web_navigate",
    "web_screenshot",
    "web_pdf",
    "web_extract_content",
    "web_extract_links",
    "web_extract_metadata",
    "web_execute_js",
    "web_capture_mhtml",
    // Triangulated research tools (CONS-006)
    "triangulate_sources",
    "verify_claim",
    "check_source_quality",
];

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_tool_registry_new() {
        let registry = ToolRegistry::new();
        assert!(registry.tools.len() >= 8);
    }

    #[test]
    fn test_tool_definitions() {
        let registry = ToolRegistry::new();
        let defs = registry.definitions();
        assert!(!defs.is_empty());

        // Check that web_navigate exists
        let nav = defs.iter().find(|d| d.name == "web_navigate");
        assert!(nav.is_some());
    }

    #[test]
    fn test_web_navigate_tool() {
        let tool = WebNavigateTool;
        assert_eq!(tool.name(), "web_navigate");
        assert!(tool.description().contains("Navigate"));

        let schema = tool.input_schema();
        assert!(schema["properties"]["url"].is_object());
    }

    #[test]
    fn test_available_tools() {
        assert!(AVAILABLE_TOOLS.contains(&"web_navigate"));
        assert!(AVAILABLE_TOOLS.contains(&"web_screenshot"));
        assert!(AVAILABLE_TOOLS.contains(&"web_execute_js"));
    }

    // ============================================================================
    // SSRF Protection Tests
    // ============================================================================

    #[test]
    fn test_ssrf_allows_public_urls() {
        assert!(is_url_safe("https://example.com").unwrap());
        assert!(is_url_safe("https://google.com/search?q=test").unwrap());
        assert!(is_url_safe("http://github.com").unwrap());
    }

    #[test]
    fn test_ssrf_blocks_localhost() {
        assert!(!is_url_safe("http://localhost").unwrap());
        assert!(!is_url_safe("http://localhost:8080").unwrap());
        assert!(!is_url_safe("https://localhost/api").unwrap());
        assert!(!is_url_safe("http://127.0.0.1").unwrap());
        assert!(!is_url_safe("http://127.0.0.1:3000").unwrap());
        assert!(!is_url_safe("http://[::1]").unwrap());
        assert!(!is_url_safe("http://0.0.0.0").unwrap());
    }

    #[test]
    fn test_ssrf_blocks_private_ips() {
        // RFC 1918 private ranges
        assert!(!is_url_safe("http://10.0.0.1").unwrap());
        assert!(!is_url_safe("http://10.255.255.255").unwrap());
        assert!(!is_url_safe("http://172.16.0.1").unwrap());
        assert!(!is_url_safe("http://172.31.255.255").unwrap());
        assert!(!is_url_safe("http://192.168.0.1").unwrap());
        assert!(!is_url_safe("http://192.168.1.100").unwrap());
    }

    #[test]
    fn test_ssrf_blocks_cloud_metadata() {
        assert!(!is_url_safe("http://169.254.169.254").unwrap());
        assert!(!is_url_safe("http://169.254.169.254/latest/meta-data/").unwrap());
        assert!(!is_url_safe("http://metadata.google.internal").unwrap());
        assert!(!is_url_safe("http://metadata").unwrap());
    }

    #[test]
    fn test_ssrf_blocks_internal_domains() {
        assert!(!is_url_safe("http://server.internal").unwrap());
        assert!(!is_url_safe("http://app.local").unwrap());
        assert!(!is_url_safe("http://db.localhost").unwrap());
        assert!(!is_url_safe("http://router.lan").unwrap());
        assert!(!is_url_safe("http://mail.corp").unwrap());
        assert!(!is_url_safe("http://nas.home").unwrap());
    }

    #[test]
    fn test_ssrf_blocks_dangerous_schemes() {
        assert!(!is_url_safe("file:///etc/passwd").unwrap());
        assert!(!is_url_safe("ftp://example.com").unwrap());
        assert!(!is_url_safe("gopher://example.com").unwrap());
        assert!(!is_url_safe("javascript:alert(1)").unwrap_or(false));
    }

    #[test]
    fn test_ssrf_blocks_cgnat_range() {
        // 100.64.0.0/10 (CGNAT)
        assert!(!is_url_safe("http://100.64.0.1").unwrap());
        assert!(!is_url_safe("http://100.100.100.100").unwrap());
        assert!(!is_url_safe("http://100.127.255.255").unwrap());
    }

    #[test]
    fn test_validate_url_ssrf_returns_none_for_safe_urls() {
        assert!(validate_url_ssrf("https://example.com").is_none());
        assert!(validate_url_ssrf("https://github.com/repo").is_none());
    }

    #[test]
    fn test_validate_url_ssrf_returns_error_for_unsafe_urls() {
        let result = validate_url_ssrf("http://localhost:8080");
        assert!(result.is_some());

        let result = validate_url_ssrf("http://169.254.169.254");
        assert!(result.is_some());

        let result = validate_url_ssrf("http://192.168.1.1");
        assert!(result.is_some());
    }

    // ============================================================================
    // Triangulated Research Tools Tests (CONS-006)
    // ============================================================================

    #[test]
    fn test_triangulate_sources_tool() {
        let tool = TriangulateSourcesTool;
        assert_eq!(tool.name(), "triangulate_sources");
        assert!(tool.description().contains("CONS-006"));

        let schema = tool.input_schema();
        assert!(schema["properties"]["urls"].is_object());
        assert_eq!(schema["required"][0], "urls");
    }

    #[test]
    fn test_verify_claim_tool() {
        let tool = VerifyClaimTool;
        assert_eq!(tool.name(), "verify_claim");
        assert!(tool.description().contains("triangulated"));

        let schema = tool.input_schema();
        assert!(schema["properties"]["query"].is_object());
        assert!(schema["properties"]["urls"].is_object());
        assert!(schema["properties"]["preset"].is_object());
    }

    #[test]
    fn test_check_source_quality_tool() {
        let tool = CheckSourceQualityTool;
        assert_eq!(tool.name(), "check_source_quality");
        assert!(tool.description().contains("quality"));

        let schema = tool.input_schema();
        assert!(schema["properties"]["url"].is_object());
        assert_eq!(schema["required"][0], "url");
    }

    #[test]
    fn test_available_tools_includes_triangulation() {
        assert!(AVAILABLE_TOOLS.contains(&"triangulate_sources"));
        assert!(AVAILABLE_TOOLS.contains(&"verify_claim"));
        assert!(AVAILABLE_TOOLS.contains(&"check_source_quality"));
    }

    #[test]
    fn test_tool_registry_includes_triangulation_tools() {
        let registry = ToolRegistry::new();
        let defs = registry.definitions();

        // Check triangulation tools exist
        assert!(defs.iter().any(|d| d.name == "triangulate_sources"));
        assert!(defs.iter().any(|d| d.name == "verify_claim"));
        assert!(defs.iter().any(|d| d.name == "check_source_quality"));

        // Registry should have at least 11 tools now (8 browser + 3 triangulation)
        assert!(registry.tools.len() >= 11);
    }
}