ai-agent-sdk 0.5.0

use crate::types::*;
use reqwest::Client;
use regex::Regex;

pub struct WebFetchTool {
    client: Client,
}

impl WebFetchTool {
    pub fn new() -> Self {
        let client = Client::builder()
            .timeout(std::time::Duration::from_secs(30))
            .user_agent("Mozilla/5.0 (compatible; AgentSDK/1.0)")
            .build()
            .expect("Failed to create HTTP client");
        Self { client }
    }

    pub fn name(&self) -> &str {
        "WebFetch"
    }

    pub fn description(&self) -> &str {
        "Fetch content from a URL and return it as text. Supports HTML pages, JSON APIs, and plain text. Strips HTML tags for readability."
    }

    pub fn input_schema(&self) -> ToolInputSchema {
        ToolInputSchema {
            schema_type: "object".to_string(),
            properties: serde_json::json!({
                "url": {
                    "type": "string",
                    "description": "The URL to fetch content from"
                },
                "headers": {
                    "type": "object",
                    "description": "Optional HTTP headers",
                    "additionalProperties": {
                        "type": "string"
                    }
                }
            }),
            required: Some(vec!["url".to_string()]),
        }
    }

    pub async fn execute(
        &self,
        input: serde_json::Value,
        _context: &ToolContext,
    ) -> Result<ToolResult, crate::error::AgentError> {
        let url = input["url"]
            .as_str()
            .ok_or_else(|| crate::error::AgentError::Tool("url is required".to_string()))?;

        // Build request with optional headers
        let mut request = self.client.get(url);

        if let Some(headers) = input["headers"].as_object() {
            for (key, value) in headers {
                if let Some(value_str) = value.as_str() {
                    request = request.header(key, value_str);
                }
            }
        }

        let response = request.send().await.map_err(|e| {
            crate::error::AgentError::Tool(format!("Error fetching {}: {}", url, e))
        })?;

        if !response.status().is_success() {
            return Ok(ToolResult {
                result_type: "text".to_string(),
                tool_use_id: "".to_string(),
                content: format!("HTTP {}: {}", response.status().as_u16(), response.status().canonical_reason().unwrap_or("Unknown")),
                is_error: Some(true),
            });
        }

        let content_type = response
            .headers()
            .get("content-type")
            .and_then(|v| v.to_str().ok())
            .map(|s| s.to_string())
            .unwrap_or_default();

        let mut text = response.text().await.map_err(|e| {
            crate::error::AgentError::Tool(format!("Error reading response: {}", e))
        })?;

        // Strip HTML tags for readability
        if content_type.contains("text/html") {
            // Remove script and style blocks
            let script_regex = Regex::new(r"<script[^>]*>[\s\S]*?</script>").unwrap();
            text = script_regex.replace_all(&text, "").to_string();

            let style_regex = Regex::new(r"<style[^>]*>[\s\S]*?</style>").unwrap();
            text = style_regex.replace_all(&text, "").to_string();

            // Remove HTML tags
            let tag_regex = Regex::new(r"<[^>]+>").unwrap();
            text = tag_regex.replace_all(&text, " ").to_string();

            // Clean up whitespace
            let whitespace_regex = Regex::new(r"\s+").unwrap();
            text = whitespace_regex.replace_all(&text, " ").trim().to_string();
        }

        // Truncate very large responses
        if text.len() > 100000 {
            text.truncate(100000);
            text.push_str("\n...(truncated)");
        }

        if text.is_empty() {
            text = "(empty response)".to_string();
        }

        Ok(ToolResult {
            result_type: "text".to_string(),
            tool_use_id: "".to_string(),
            content: text,
            is_error: None,
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_web_fetch_tool_name() {
        let tool = WebFetchTool::new();
        assert_eq!(tool.name(), "WebFetch");
    }

    #[test]
    fn test_web_fetch_tool_description_contains_fetch() {
        let tool = WebFetchTool::new();
        assert!(tool.description().to_lowercase().contains("fetch"));
    }

    #[test]
    fn test_web_fetch_tool_has_url_in_schema() {
        let tool = WebFetchTool::new();
        let schema = tool.input_schema();
        assert!(schema.properties.get("url").is_some());
    }

    #[test]
    fn test_web_fetch_tool_has_headers_in_schema() {
        let tool = WebFetchTool::new();
        let schema = tool.input_schema();
        assert!(schema.properties.get("headers").is_some());
    }

    #[tokio::test]
    async fn test_web_fetch_tool_requires_url() {
        let tool = WebFetchTool::new();
        let input = serde_json::json!({});
        let context = ToolContext::default();

        let result = tool.execute(input, &context).await;
        assert!(result.is_err());
    }

    #[tokio::test]
    #[ignore] // Requires network access to httpbin
    async fn test_web_fetch_tool_fetches_plain_text() {
        let tool = WebFetchTool::new();
        // Using a simple echo endpoint
        let input = serde_json::json!({
            "url": "https://httpbin.org/robots.txt"
        });
        let context = ToolContext::default();

        let result = tool.execute(input, &context).await;
        assert!(result.is_ok());
        let tool_result = result.unwrap();
        assert!(!tool_result.content.is_empty());
    }

    #[tokio::test]
    #[ignore] // Requires network access to httpbin
    async fn test_web_fetch_tool_strips_html_tags() {
        let tool = WebFetchTool::new();
        // Using a simple HTML page
        let input = serde_json::json!({
            "url": "https://httpbin.org/html"
        });
        let context = ToolContext::default();

        let result = tool.execute(input, &context).await;
        assert!(result.is_ok());
        let tool_result = result.unwrap();
        // HTML tags should be stripped
        assert!(!tool_result.content.contains("<html"));
        assert!(!tool_result.content.contains("<body"));
    }

    #[tokio::test]
    #[ignore] // Requires network access
    async fn test_web_fetch_tool_returns_error_for_invalid_url() {
        let tool = WebFetchTool::new();
        let input = serde_json::json!({
            "url": "https://this-domain-does-not-exist-123456.invalid/"
        });
        let context = ToolContext::default();

        let result = tool.execute(input, &context).await;
        assert!(result.is_err());
    }
}