use crate::browser::{BrowserController, CaptureFormat, CaptureOptions, PageCapture};
use crate::error::Result;
use crate::extraction::{ContentExtractor, LinkExtractor, MetadataExtractor};
use crate::mcp::types::{McpToolDefinition, ToolCallResult, ToolContent};
use crate::research::{ResearchConfig, SourceTier, TierClassifier, TriangulationEngine};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::net::IpAddr;
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{error, info, instrument, warn};
fn is_url_safe(url: &str) -> std::result::Result<bool, String> {
let parsed = url::Url::parse(url).map_err(|e| format!("Invalid URL: {}", e))?;
match parsed.scheme() {
"http" | "https" => {}
scheme => {
warn!(scheme = %scheme, "SSRF: Blocked scheme");
return Ok(false);
}
}
let host = match parsed.host_str() {
Some(h) => h,
None => return Ok(false),
};
let localhost_variants = ["localhost", "127.0.0.1", "::1", "[::1]", "0.0.0.0", "0"];
if localhost_variants
.iter()
.any(|&l| host.eq_ignore_ascii_case(l))
{
warn!(host = %host, "SSRF: Blocked localhost");
return Ok(false);
}
if let Ok(ip) = host.parse::<IpAddr>() {
if !is_public_ip(&ip) {
warn!(ip = %ip, "SSRF: Blocked private/reserved IP");
return Ok(false);
}
}
let blocked_suffixes = [
".internal",
".local",
".localhost",
".lan",
".corp",
".home",
];
if blocked_suffixes
.iter()
.any(|&s| host.to_lowercase().ends_with(s))
{
warn!(host = %host, "SSRF: Blocked internal domain");
return Ok(false);
}
let blocked_hosts = [
"169.254.169.254", "metadata.google.internal", "metadata", ];
if blocked_hosts.iter().any(|&h| host.eq_ignore_ascii_case(h)) {
warn!(host = %host, "SSRF: Blocked cloud metadata endpoint");
return Ok(false);
}
Ok(true)
}
fn is_public_ip(ip: &IpAddr) -> bool {
match ip {
IpAddr::V4(ipv4) => {
!ipv4.is_private()
&& !ipv4.is_loopback()
&& !ipv4.is_link_local()
&& !ipv4.is_broadcast()
&& !ipv4.is_documentation()
&& !ipv4.is_unspecified()
&& !(ipv4.octets()[0] == 100 && (64..=127).contains(&ipv4.octets()[1]))
&& !(ipv4.octets()[0] == 192 && ipv4.octets()[1] == 0 && ipv4.octets()[2] == 0)
}
IpAddr::V6(ipv6) => {
!ipv6.is_loopback()
&& !ipv6.is_unspecified()
&& (ipv6.segments()[0] & 0xffc0) != 0xfe80
&& (ipv6.segments()[0] & 0xfe00) != 0xfc00
}
}
}
fn validate_url_ssrf(url: &str) -> Option<ToolCallResult> {
match is_url_safe(url) {
Ok(true) => None, Ok(false) => Some(ToolCallResult::error(format!(
"SSRF protection: URL '{}' is not allowed (private IP, localhost, or blocked endpoint)",
url
))),
Err(e) => Some(ToolCallResult::error(format!("Invalid URL: {}", e))),
}
}
pub trait McpTool: Send + Sync {
fn name(&self) -> &str;
fn description(&self) -> &str;
fn input_schema(&self) -> Value;
fn definition(&self) -> McpToolDefinition {
McpToolDefinition {
name: self.name().to_string(),
description: self.description().to_string(),
input_schema: self.input_schema(),
}
}
}
pub struct ToolRegistry {
tools: HashMap<String, Box<dyn McpTool>>,
#[allow(dead_code)]
browser: Arc<RwLock<Option<BrowserController>>>,
}
impl ToolRegistry {
pub fn new() -> Self {
let mut registry = Self {
tools: HashMap::new(),
browser: Arc::new(RwLock::new(None)),
};
registry.register(Box::new(WebNavigateTool));
registry.register(Box::new(WebScreenshotTool));
registry.register(Box::new(WebPdfTool));
registry.register(Box::new(WebExtractContentTool));
registry.register(Box::new(WebExtractLinksTool));
registry.register(Box::new(WebExtractMetadataTool));
registry.register(Box::new(WebExecuteJsTool));
registry.register(Box::new(WebCaptureMhtmlTool));
registry.register(Box::new(TriangulateSourcesTool));
registry.register(Box::new(VerifyClaimTool));
registry.register(Box::new(CheckSourceQualityTool));
registry
}
pub fn register(&mut self, tool: Box<dyn McpTool>) {
self.tools.insert(tool.name().to_string(), tool);
}
pub fn definitions(&self) -> Vec<McpToolDefinition> {
self.tools.values().map(|t| t.definition()).collect()
}
#[instrument(skip(self, args))]
pub async fn execute(&self, name: &str, args: Value) -> ToolCallResult {
info!("Executing tool: {}", name);
if !self.tools.contains_key(name) {
return ToolCallResult::error(format!("Tool not found: {}", name));
}
let browser = self.get_or_create_browser().await;
let browser = match browser {
Ok(b) => b,
Err(e) => return ToolCallResult::error(format!("Failed to create browser: {}", e)),
};
match name {
"web_navigate" => self.execute_navigate(&browser, args).await,
"web_screenshot" => self.execute_screenshot(&browser, args).await,
"web_pdf" => self.execute_pdf(&browser, args).await,
"web_extract_content" => self.execute_extract_content(&browser, args).await,
"web_extract_links" => self.execute_extract_links(&browser, args).await,
"web_extract_metadata" => self.execute_extract_metadata(&browser, args).await,
"web_execute_js" => self.execute_js(&browser, args).await,
"web_capture_mhtml" => self.execute_capture_mhtml(&browser, args).await,
"triangulate_sources" => self.execute_triangulate_sources(args).await,
"verify_claim" => self.execute_verify_claim(args).await,
"check_source_quality" => self.execute_check_source_quality(args).await,
_ => ToolCallResult::error(format!("Unknown tool: {}", name)),
}
}
async fn get_or_create_browser(&self) -> Result<BrowserController> {
BrowserController::new().await
}
async fn execute_navigate(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
let url = match args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return ToolCallResult::error("Missing required parameter: url"),
};
if let Some(err) = validate_url_ssrf(url) {
return err;
}
match browser.navigate(url).await {
Ok(page) => {
let current_url = page.url().await;
ToolCallResult::text(format!("Successfully navigated to: {}", current_url))
}
Err(e) => {
error!("Navigation failed: {}", e);
ToolCallResult::error(format!("Navigation failed: {}", e))
}
}
}
async fn execute_screenshot(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
let url = match args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return ToolCallResult::error("Missing required parameter: url"),
};
if let Some(err) = validate_url_ssrf(url) {
return err;
}
let full_page = args
.get("fullPage")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let format_str = args.get("format").and_then(|v| v.as_str()).unwrap_or("png");
let format = match format_str {
"jpeg" | "jpg" => CaptureFormat::Jpeg,
"webp" => CaptureFormat::Webp,
_ => CaptureFormat::Png,
};
match browser.navigate(url).await {
Ok(page) => {
let options = CaptureOptions {
format,
full_page,
as_base64: true,
..Default::default()
};
match PageCapture::capture(&page, &options).await {
Ok(result) => {
let base64 = result.base64.clone().unwrap_or_else(|| result.to_base64());
ToolCallResult::image(base64, result.mime_type())
}
Err(e) => ToolCallResult::error(format!("Screenshot failed: {}", e)),
}
}
Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
}
}
async fn execute_pdf(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
let url = match args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return ToolCallResult::error("Missing required parameter: url"),
};
if let Some(err) = validate_url_ssrf(url) {
return err;
}
match browser.navigate(url).await {
Ok(page) => {
let options = CaptureOptions::pdf();
match PageCapture::capture(&page, &options).await {
Ok(result) => {
let base64 = result.to_base64();
ToolCallResult::multi(vec![
ToolContent::text(format!("PDF generated: {} bytes", result.size)),
ToolContent::Resource {
uri: format!("pdf://{}", url),
resource: crate::mcp::types::ResourceContent {
mime_type: "application/pdf".to_string(),
text: None,
blob: Some(base64),
},
},
])
}
Err(e) => ToolCallResult::error(format!("PDF generation failed: {}", e)),
}
}
Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
}
}
async fn execute_extract_content(
&self,
browser: &BrowserController,
args: Value,
) -> ToolCallResult {
let url = match args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return ToolCallResult::error("Missing required parameter: url"),
};
if let Some(err) = validate_url_ssrf(url) {
return err;
}
let selector = args.get("selector").and_then(|v| v.as_str());
let format = args
.get("format")
.and_then(|v| v.as_str())
.unwrap_or("markdown");
match browser.navigate(url).await {
Ok(page) => {
let content = if let Some(sel) = selector {
ContentExtractor::extract_from_selector(&page, sel).await
} else {
ContentExtractor::extract_main_content(&page).await
};
match content {
Ok(c) => {
let output = match format {
"text" => c.text,
"html" => c.html,
_ => c.markdown.unwrap_or(c.text),
};
ToolCallResult::text(output)
}
Err(e) => ToolCallResult::error(format!("Content extraction failed: {}", e)),
}
}
Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
}
}
async fn execute_extract_links(
&self,
browser: &BrowserController,
args: Value,
) -> ToolCallResult {
let url = match args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return ToolCallResult::error("Missing required parameter: url"),
};
if let Some(err) = validate_url_ssrf(url) {
return err;
}
let link_type = args.get("type").and_then(|v| v.as_str());
let selector = args.get("selector").and_then(|v| v.as_str());
match browser.navigate(url).await {
Ok(page) => {
let links = if let Some(sel) = selector {
LinkExtractor::extract_from_selector(&page, sel).await
} else {
match link_type {
Some("internal") => LinkExtractor::extract_internal(&page).await,
Some("external") => LinkExtractor::extract_external(&page).await,
_ => LinkExtractor::extract_all(&page).await,
}
};
match links {
Ok(links) => {
let json = serde_json::to_string_pretty(&links)
.unwrap_or_else(|_| "[]".to_string());
ToolCallResult::text(json)
}
Err(e) => ToolCallResult::error(format!("Link extraction failed: {}", e)),
}
}
Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
}
}
async fn execute_extract_metadata(
&self,
browser: &BrowserController,
args: Value,
) -> ToolCallResult {
let url = match args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return ToolCallResult::error("Missing required parameter: url"),
};
if let Some(err) = validate_url_ssrf(url) {
return err;
}
match browser.navigate(url).await {
Ok(page) => match MetadataExtractor::extract(&page).await {
Ok(meta) => {
let json =
serde_json::to_string_pretty(&meta).unwrap_or_else(|_| "{}".to_string());
ToolCallResult::text(json)
}
Err(e) => ToolCallResult::error(format!("Metadata extraction failed: {}", e)),
},
Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
}
}
async fn execute_js(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
let url = match args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return ToolCallResult::error("Missing required parameter: url"),
};
if let Some(err) = validate_url_ssrf(url) {
return err;
}
let script = match args.get("script").and_then(|v| v.as_str()) {
Some(s) => s,
None => return ToolCallResult::error("Missing required parameter: script"),
};
match browser.navigate(url).await {
Ok(page) => match page.page.evaluate(script).await {
Ok(result) => {
let value: Value = result.into_value().unwrap_or(Value::Null);
let output =
serde_json::to_string_pretty(&value).unwrap_or_else(|_| "null".to_string());
ToolCallResult::text(output)
}
Err(e) => ToolCallResult::error(format!("JavaScript execution failed: {}", e)),
},
Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
}
}
async fn execute_capture_mhtml(
&self,
browser: &BrowserController,
args: Value,
) -> ToolCallResult {
let url = match args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return ToolCallResult::error("Missing required parameter: url"),
};
if let Some(err) = validate_url_ssrf(url) {
return err;
}
match browser.navigate(url).await {
Ok(page) => match PageCapture::mhtml(&page).await {
Ok(result) => {
let base64 = result.to_base64();
ToolCallResult::multi(vec![
ToolContent::text(format!("MHTML captured: {} bytes", result.size)),
ToolContent::Resource {
uri: format!("mhtml://{}", url),
resource: crate::mcp::types::ResourceContent {
mime_type: "multipart/related".to_string(),
text: None,
blob: Some(base64),
},
},
])
}
Err(e) => ToolCallResult::error(format!("MHTML capture failed: {}", e)),
},
Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
}
}
#[instrument(skip(self, args))]
async fn execute_triangulate_sources(&self, args: Value) -> ToolCallResult {
let urls: Vec<String> = match args.get("urls") {
Some(Value::Array(arr)) => arr
.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect(),
_ => {
return ToolCallResult::error("Missing required parameter: urls (array of strings)")
}
};
if urls.is_empty() {
return ToolCallResult::error("urls array cannot be empty");
}
for url in &urls {
if let Some(err) = validate_url_ssrf(url) {
return err;
}
}
let config = ResearchConfig::default();
let engine = TriangulationEngine::new(config);
let (meets_requirement, message) = engine.quick_verify(&urls);
let mut source_details: Vec<Value> = Vec::new();
for url in &urls {
let quality = engine.check_source(url);
source_details.push(json!({
"url": url,
"tier": format!("{:?}", quality.tier),
"tier_weight": quality.tier.weight(),
"domain": quality.domain,
"confidence": quality.confidence,
"reasons": quality.reasons,
}));
}
let result = json!({
"meets_triangulation": meets_requirement,
"message": message,
"min_sources_required": 3,
"sources_provided": urls.len(),
"source_details": source_details,
"recommendation": if meets_requirement {
"Sources meet triangulation requirements. Proceed with verification."
} else {
"Add more high-quality sources (Tier 1 or Tier 2) to meet triangulation requirements."
}
});
ToolCallResult::text(
serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string()),
)
}
#[instrument(skip(self, args))]
async fn execute_verify_claim(&self, args: Value) -> ToolCallResult {
let query = match args.get("query").and_then(|v| v.as_str()) {
Some(q) => q.to_string(),
None => return ToolCallResult::error("Missing required parameter: query"),
};
let urls: Vec<String> = match args.get("urls") {
Some(Value::Array(arr)) => arr
.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect(),
_ => {
return ToolCallResult::error("Missing required parameter: urls (array of strings)")
}
};
let contents: Vec<(String, Option<String>, Option<bool>)> = match args.get("contents") {
Some(Value::Array(arr)) => arr
.iter()
.filter_map(|v| {
if let Value::Array(item) = v {
let url = item.first()?.as_str()?.to_string();
let content = item.get(1).and_then(|c| c.as_str()).map(|s| s.to_string());
let supports = item.get(2).and_then(|s| s.as_bool());
Some((url, content, supports))
} else {
None
}
})
.collect(),
_ => Vec::new(), };
for url in &urls {
if let Some(err) = validate_url_ssrf(url) {
return err;
}
}
let preset = args
.get("preset")
.and_then(|v| v.as_str())
.unwrap_or("default");
let config = match preset {
"strict" => ResearchConfig::strict(),
"permissive" => ResearchConfig::permissive(),
_ => ResearchConfig::default(),
};
let engine = TriangulationEngine::new(config);
let result = engine.research_with_urls(&query, &urls, &contents);
let response = json!({
"verification_status": format!("{:?}", result.status),
"status_description": result.status.description(),
"is_verified": result.is_verified(),
"confidence": result.confidence,
"query": result.query,
"metrics": {
"total_sources": result.metrics.total_sources,
"accessible_sources": result.metrics.accessible_sources,
"supporting_sources": result.metrics.supporting_sources,
"refuting_sources": result.metrics.refuting_sources,
"neutral_sources": result.metrics.neutral_sources,
"tier1_count": result.metrics.tier1_count,
"tier2_count": result.metrics.tier2_count,
"tier3_count": result.metrics.tier3_count,
"average_confidence": result.metrics.average_confidence,
"meets_triangulation": result.metrics.meets_triangulation(),
},
"sources": result.sources.iter().map(|s| json!({
"url": s.url,
"title": s.title,
"tier": format!("{:?}", s.quality.tier),
"supports_claim": s.supports_claim,
"relevance_score": s.relevance_score,
"content_snippet": s.content_snippet,
"is_usable": s.is_usable(),
})).collect::<Vec<_>>(),
"consensus": {
"status": format!("{:?}", result.consensus.status),
"confidence": result.consensus.confidence,
"consensus_answer": result.consensus.consensus_answer,
"discrepancy_count": result.consensus.discrepancies.len(),
},
"timestamp": result.timestamp.to_rfc3339(),
});
ToolCallResult::text(
serde_json::to_string_pretty(&response).unwrap_or_else(|_| "{}".to_string()),
)
}
#[instrument(skip(self, args))]
async fn execute_check_source_quality(&self, args: Value) -> ToolCallResult {
let url = match args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return ToolCallResult::error("Missing required parameter: url"),
};
if let Some(err) = validate_url_ssrf(url) {
return err;
}
let classifier = TierClassifier::default();
let quality = classifier.classify(url);
let result = json!({
"url": url,
"tier": format!("{:?}", quality.tier),
"tier_description": match quality.tier {
SourceTier::Tier1 => "Authoritative (official docs, .gov, .edu, peer-reviewed)",
SourceTier::Tier2 => "Reputable (Wikipedia, major news, Stack Overflow)",
SourceTier::Tier3 => "Low quality (forums, social media, unknown)",
SourceTier::Unknown => "Unknown (could not classify)",
},
"tier_weight": quality.tier.weight(),
"domain": quality.domain,
"base_confidence": quality.confidence,
"reasons": quality.reasons,
"is_authoritative": quality.tier == SourceTier::Tier1,
"is_reputable": matches!(quality.tier, SourceTier::Tier1 | SourceTier::Tier2),
"recommendation": match quality.tier {
SourceTier::Tier1 => "Excellent source. High priority for triangulation.",
SourceTier::Tier2 => "Good source. Acceptable for triangulation.",
SourceTier::Tier3 => "Use with caution. Seek additional Tier 1/2 sources.",
SourceTier::Unknown => "Unknown quality. Verify manually before using.",
}
});
ToolCallResult::text(
serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string()),
)
}
}
impl Default for ToolRegistry {
fn default() -> Self {
Self::new()
}
}
struct WebNavigateTool;
impl McpTool for WebNavigateTool {
fn name(&self) -> &str {
"web_navigate"
}
fn description(&self) -> &str {
"Navigate to a URL using a headless browser"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to navigate to"
},
"waitFor": {
"type": "string",
"description": "CSS selector to wait for before returning",
"optional": true
}
},
"required": ["url"]
})
}
}
struct WebScreenshotTool;
impl McpTool for WebScreenshotTool {
fn name(&self) -> &str {
"web_screenshot"
}
fn description(&self) -> &str {
"Capture a screenshot of a web page"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to capture"
},
"fullPage": {
"type": "boolean",
"description": "Capture full page (default: true)",
"default": true
},
"format": {
"type": "string",
"enum": ["png", "jpeg", "webp"],
"description": "Image format (default: png)",
"default": "png"
},
"selector": {
"type": "string",
"description": "CSS selector to capture specific element"
}
},
"required": ["url"]
})
}
}
struct WebPdfTool;
impl McpTool for WebPdfTool {
fn name(&self) -> &str {
"web_pdf"
}
fn description(&self) -> &str {
"Generate a PDF of a web page"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to convert to PDF"
},
"printBackground": {
"type": "boolean",
"description": "Print background graphics (default: true)",
"default": true
}
},
"required": ["url"]
})
}
}
struct WebExtractContentTool;
impl McpTool for WebExtractContentTool {
fn name(&self) -> &str {
"web_extract_content"
}
fn description(&self) -> &str {
"Extract main content from a web page as text or markdown"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to extract content from"
},
"selector": {
"type": "string",
"description": "CSS selector to extract from (default: auto-detect main content)"
},
"format": {
"type": "string",
"enum": ["text", "markdown", "html"],
"description": "Output format (default: markdown)",
"default": "markdown"
}
},
"required": ["url"]
})
}
}
struct WebExtractLinksTool;
impl McpTool for WebExtractLinksTool {
fn name(&self) -> &str {
"web_extract_links"
}
fn description(&self) -> &str {
"Extract all links from a web page with context"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to extract links from"
},
"type": {
"type": "string",
"enum": ["all", "internal", "external"],
"description": "Type of links to extract (default: all)",
"default": "all"
},
"selector": {
"type": "string",
"description": "CSS selector to extract links from"
}
},
"required": ["url"]
})
}
}
struct WebExtractMetadataTool;
impl McpTool for WebExtractMetadataTool {
fn name(&self) -> &str {
"web_extract_metadata"
}
fn description(&self) -> &str {
"Extract page metadata (title, description, Open Graph, Twitter Card, etc.)"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to extract metadata from"
}
},
"required": ["url"]
})
}
}
struct WebExecuteJsTool;
impl McpTool for WebExecuteJsTool {
fn name(&self) -> &str {
"web_execute_js"
}
fn description(&self) -> &str {
"Execute JavaScript on a web page and return the result"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to execute JavaScript on"
},
"script": {
"type": "string",
"description": "The JavaScript code to execute"
}
},
"required": ["url", "script"]
})
}
}
struct WebCaptureMhtmlTool;
impl McpTool for WebCaptureMhtmlTool {
fn name(&self) -> &str {
"web_capture_mhtml"
}
fn description(&self) -> &str {
"Capture a complete web page as an MHTML archive"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to capture"
}
},
"required": ["url"]
})
}
}
struct TriangulateSourcesTool;
impl McpTool for TriangulateSourcesTool {
fn name(&self) -> &str {
"triangulate_sources"
}
fn description(&self) -> &str {
"Check if sources meet triangulation requirements (CONS-006: 3+ independent sources with quality tiers)"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": { "type": "string" },
"description": "Array of source URLs to validate for triangulation",
"minItems": 1
}
},
"required": ["urls"]
})
}
}
struct VerifyClaimTool;
impl McpTool for VerifyClaimTool {
fn name(&self) -> &str {
"verify_claim"
}
fn description(&self) -> &str {
"Verify a claim using triangulated sources (3+ independent sources) with consensus analysis"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The claim or query to verify"
},
"urls": {
"type": "array",
"items": { "type": "string" },
"description": "Array of source URLs to use for verification",
"minItems": 1
},
"contents": {
"type": "array",
"description": "Optional array of [url, content_snippet, supports_claim] tuples",
"items": {
"type": "array",
"items": [
{ "type": "string", "description": "URL" },
{ "type": ["string", "null"], "description": "Content snippet" },
{ "type": ["boolean", "null"], "description": "Whether content supports the claim" }
]
}
},
"preset": {
"type": "string",
"enum": ["default", "strict", "permissive"],
"description": "Configuration preset (default: standard 3+ sources, strict: 5+ sources Tier1 only, permissive: 2+ sources)",
"default": "default"
}
},
"required": ["query", "urls"]
})
}
}
struct CheckSourceQualityTool;
impl McpTool for CheckSourceQualityTool {
fn name(&self) -> &str {
"check_source_quality"
}
fn description(&self) -> &str {
"Assess the quality tier (Tier1/2/3) and reliability of a source URL"
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to assess for quality"
}
},
"required": ["url"]
})
}
}
pub const AVAILABLE_TOOLS: &[&str] = &[
"web_navigate",
"web_screenshot",
"web_pdf",
"web_extract_content",
"web_extract_links",
"web_extract_metadata",
"web_execute_js",
"web_capture_mhtml",
"triangulate_sources",
"verify_claim",
"check_source_quality",
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tool_registry_new() {
let registry = ToolRegistry::new();
assert!(registry.tools.len() >= 8);
}
#[test]
fn test_tool_definitions() {
let registry = ToolRegistry::new();
let defs = registry.definitions();
assert!(!defs.is_empty());
let nav = defs.iter().find(|d| d.name == "web_navigate");
assert!(nav.is_some());
}
#[test]
fn test_web_navigate_tool() {
let tool = WebNavigateTool;
assert_eq!(tool.name(), "web_navigate");
assert!(tool.description().contains("Navigate"));
let schema = tool.input_schema();
assert!(schema["properties"]["url"].is_object());
}
#[test]
fn test_available_tools() {
assert!(AVAILABLE_TOOLS.contains(&"web_navigate"));
assert!(AVAILABLE_TOOLS.contains(&"web_screenshot"));
assert!(AVAILABLE_TOOLS.contains(&"web_execute_js"));
}
#[test]
fn test_ssrf_allows_public_urls() {
assert!(is_url_safe("https://example.com").unwrap());
assert!(is_url_safe("https://google.com/search?q=test").unwrap());
assert!(is_url_safe("http://github.com").unwrap());
}
#[test]
fn test_ssrf_blocks_localhost() {
assert!(!is_url_safe("http://localhost").unwrap());
assert!(!is_url_safe("http://localhost:8080").unwrap());
assert!(!is_url_safe("https://localhost/api").unwrap());
assert!(!is_url_safe("http://127.0.0.1").unwrap());
assert!(!is_url_safe("http://127.0.0.1:3000").unwrap());
assert!(!is_url_safe("http://[::1]").unwrap());
assert!(!is_url_safe("http://0.0.0.0").unwrap());
}
#[test]
fn test_ssrf_blocks_private_ips() {
assert!(!is_url_safe("http://10.0.0.1").unwrap());
assert!(!is_url_safe("http://10.255.255.255").unwrap());
assert!(!is_url_safe("http://172.16.0.1").unwrap());
assert!(!is_url_safe("http://172.31.255.255").unwrap());
assert!(!is_url_safe("http://192.168.0.1").unwrap());
assert!(!is_url_safe("http://192.168.1.100").unwrap());
}
#[test]
fn test_ssrf_blocks_cloud_metadata() {
assert!(!is_url_safe("http://169.254.169.254").unwrap());
assert!(!is_url_safe("http://169.254.169.254/latest/meta-data/").unwrap());
assert!(!is_url_safe("http://metadata.google.internal").unwrap());
assert!(!is_url_safe("http://metadata").unwrap());
}
#[test]
fn test_ssrf_blocks_internal_domains() {
assert!(!is_url_safe("http://server.internal").unwrap());
assert!(!is_url_safe("http://app.local").unwrap());
assert!(!is_url_safe("http://db.localhost").unwrap());
assert!(!is_url_safe("http://router.lan").unwrap());
assert!(!is_url_safe("http://mail.corp").unwrap());
assert!(!is_url_safe("http://nas.home").unwrap());
}
#[test]
fn test_ssrf_blocks_dangerous_schemes() {
assert!(!is_url_safe("file:///etc/passwd").unwrap());
assert!(!is_url_safe("ftp://example.com").unwrap());
assert!(!is_url_safe("gopher://example.com").unwrap());
assert!(!is_url_safe("javascript:alert(1)").unwrap_or(false));
}
#[test]
fn test_ssrf_blocks_cgnat_range() {
assert!(!is_url_safe("http://100.64.0.1").unwrap());
assert!(!is_url_safe("http://100.100.100.100").unwrap());
assert!(!is_url_safe("http://100.127.255.255").unwrap());
}
#[test]
fn test_validate_url_ssrf_returns_none_for_safe_urls() {
assert!(validate_url_ssrf("https://example.com").is_none());
assert!(validate_url_ssrf("https://github.com/repo").is_none());
}
#[test]
fn test_validate_url_ssrf_returns_error_for_unsafe_urls() {
let result = validate_url_ssrf("http://localhost:8080");
assert!(result.is_some());
let result = validate_url_ssrf("http://169.254.169.254");
assert!(result.is_some());
let result = validate_url_ssrf("http://192.168.1.1");
assert!(result.is_some());
}
#[test]
fn test_triangulate_sources_tool() {
let tool = TriangulateSourcesTool;
assert_eq!(tool.name(), "triangulate_sources");
assert!(tool.description().contains("CONS-006"));
let schema = tool.input_schema();
assert!(schema["properties"]["urls"].is_object());
assert_eq!(schema["required"][0], "urls");
}
#[test]
fn test_verify_claim_tool() {
let tool = VerifyClaimTool;
assert_eq!(tool.name(), "verify_claim");
assert!(tool.description().contains("triangulated"));
let schema = tool.input_schema();
assert!(schema["properties"]["query"].is_object());
assert!(schema["properties"]["urls"].is_object());
assert!(schema["properties"]["preset"].is_object());
}
#[test]
fn test_check_source_quality_tool() {
let tool = CheckSourceQualityTool;
assert_eq!(tool.name(), "check_source_quality");
assert!(tool.description().contains("quality"));
let schema = tool.input_schema();
assert!(schema["properties"]["url"].is_object());
assert_eq!(schema["required"][0], "url");
}
#[test]
fn test_available_tools_includes_triangulation() {
assert!(AVAILABLE_TOOLS.contains(&"triangulate_sources"));
assert!(AVAILABLE_TOOLS.contains(&"verify_claim"));
assert!(AVAILABLE_TOOLS.contains(&"check_source_quality"));
}
#[test]
fn test_tool_registry_includes_triangulation_tools() {
let registry = ToolRegistry::new();
let defs = registry.definitions();
assert!(defs.iter().any(|d| d.name == "triangulate_sources"));
assert!(defs.iter().any(|d| d.name == "verify_claim"));
assert!(defs.iter().any(|d| d.name == "check_source_quality"));
assert!(registry.tools.len() >= 11);
}
}