use crate::types::*;
use reqwest::Client;
use regex::Regex;
pub struct WebFetchTool {
client: Client,
}
impl WebFetchTool {
pub fn new() -> Self {
let client = Client::builder()
.timeout(std::time::Duration::from_secs(30))
.user_agent("Mozilla/5.0 (compatible; AgentSDK/1.0)")
.build()
.expect("Failed to create HTTP client");
Self { client }
}
pub fn name(&self) -> &str {
"WebFetch"
}
pub fn description(&self) -> &str {
"Fetch content from a URL and return it as text. Supports HTML pages, JSON APIs, and plain text. Strips HTML tags for readability."
}
pub fn input_schema(&self) -> ToolInputSchema {
ToolInputSchema {
schema_type: "object".to_string(),
properties: serde_json::json!({
"url": {
"type": "string",
"description": "The URL to fetch content from"
},
"headers": {
"type": "object",
"description": "Optional HTTP headers",
"additionalProperties": {
"type": "string"
}
}
}),
required: Some(vec!["url".to_string()]),
}
}
pub async fn execute(
&self,
input: serde_json::Value,
_context: &ToolContext,
) -> Result<ToolResult, crate::error::AgentError> {
let url = input["url"]
.as_str()
.ok_or_else(|| crate::error::AgentError::Tool("url is required".to_string()))?;
let mut request = self.client.get(url);
if let Some(headers) = input["headers"].as_object() {
for (key, value) in headers {
if let Some(value_str) = value.as_str() {
request = request.header(key, value_str);
}
}
}
let response = request.send().await.map_err(|e| {
crate::error::AgentError::Tool(format!("Error fetching {}: {}", url, e))
})?;
if !response.status().is_success() {
return Ok(ToolResult {
result_type: "text".to_string(),
tool_use_id: "".to_string(),
content: format!("HTTP {}: {}", response.status().as_u16(), response.status().canonical_reason().unwrap_or("Unknown")),
is_error: Some(true),
});
}
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string())
.unwrap_or_default();
let mut text = response.text().await.map_err(|e| {
crate::error::AgentError::Tool(format!("Error reading response: {}", e))
})?;
if content_type.contains("text/html") {
let script_regex = Regex::new(r"<script[^>]*>[\s\S]*?</script>").unwrap();
text = script_regex.replace_all(&text, "").to_string();
let style_regex = Regex::new(r"<style[^>]*>[\s\S]*?</style>").unwrap();
text = style_regex.replace_all(&text, "").to_string();
let tag_regex = Regex::new(r"<[^>]+>").unwrap();
text = tag_regex.replace_all(&text, " ").to_string();
let whitespace_regex = Regex::new(r"\s+").unwrap();
text = whitespace_regex.replace_all(&text, " ").trim().to_string();
}
if text.len() > 100000 {
text.truncate(100000);
text.push_str("\n...(truncated)");
}
if text.is_empty() {
text = "(empty response)".to_string();
}
Ok(ToolResult {
result_type: "text".to_string(),
tool_use_id: "".to_string(),
content: text,
is_error: None,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_web_fetch_tool_name() {
let tool = WebFetchTool::new();
assert_eq!(tool.name(), "WebFetch");
}
#[test]
fn test_web_fetch_tool_description_contains_fetch() {
let tool = WebFetchTool::new();
assert!(tool.description().to_lowercase().contains("fetch"));
}
#[test]
fn test_web_fetch_tool_has_url_in_schema() {
let tool = WebFetchTool::new();
let schema = tool.input_schema();
assert!(schema.properties.get("url").is_some());
}
#[test]
fn test_web_fetch_tool_has_headers_in_schema() {
let tool = WebFetchTool::new();
let schema = tool.input_schema();
assert!(schema.properties.get("headers").is_some());
}
#[tokio::test]
async fn test_web_fetch_tool_requires_url() {
let tool = WebFetchTool::new();
let input = serde_json::json!({});
let context = ToolContext::default();
let result = tool.execute(input, &context).await;
assert!(result.is_err());
}
#[tokio::test]
#[ignore] async fn test_web_fetch_tool_fetches_plain_text() {
let tool = WebFetchTool::new();
let input = serde_json::json!({
"url": "https://httpbin.org/robots.txt"
});
let context = ToolContext::default();
let result = tool.execute(input, &context).await;
assert!(result.is_ok());
let tool_result = result.unwrap();
assert!(!tool_result.content.is_empty());
}
#[tokio::test]
#[ignore] async fn test_web_fetch_tool_strips_html_tags() {
let tool = WebFetchTool::new();
let input = serde_json::json!({
"url": "https://httpbin.org/html"
});
let context = ToolContext::default();
let result = tool.execute(input, &context).await;
assert!(result.is_ok());
let tool_result = result.unwrap();
assert!(!tool_result.content.contains("<html"));
assert!(!tool_result.content.contains("<body"));
}
#[tokio::test]
#[ignore] async fn test_web_fetch_tool_returns_error_for_invalid_url() {
let tool = WebFetchTool::new();
let input = serde_json::json!({
"url": "https://this-domain-does-not-exist-123456.invalid/"
});
let context = ToolContext::default();
let result = tool.execute(input, &context).await;
assert!(result.is_err());
}
}