use async_trait::async_trait;
use rucora_core::{
error::ToolError,
tool::{Tool, ToolCategory},
};
use serde_json::{Value, json};
use std::time::Duration;
use super::security::validate_public_http_url;
const MAX_RESPONSE_SIZE: usize = 5 * 1024 * 1024;
pub struct WebFetchTool;
impl WebFetchTool {
pub fn new() -> Self {
Self
}
}
impl Default for WebFetchTool {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Tool for WebFetchTool {
fn name(&self) -> &str {
"web_fetch"
}
fn description(&self) -> Option<&str> {
Some("获取网页的 HTML 内容")
}
fn categories(&self) -> &'static [ToolCategory] {
&[ToolCategory::Network]
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "网页 URL"
},
"timeout": {
"type": "integer",
"description": "超时时间(秒),默认 30 秒",
"default": 30
}
},
"required": ["url"]
})
}
async fn call(&self, input: Value) -> Result<Value, ToolError> {
let url = input
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| ToolError::Message("缺少必需的 'url' 字段".to_string()))?;
let timeout_secs = input.get("timeout").and_then(|v| v.as_u64()).unwrap_or(30);
validate_public_http_url(url, None, None).await?;
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(timeout_secs))
.redirect(reqwest::redirect::Policy::none())
.user_agent("Mozilla/5.0 (compatible; rucora/0.1)")
.build()
.map_err(|e| ToolError::Message(format!("HTTP 客户端创建失败: {e}")))?;
let response = client
.get(url)
.send()
.await
.map_err(|e| ToolError::Message(format!("获取网页失败: {e}")))?;
let status = response.status().as_u16();
let body_bytes = response
.bytes()
.await
.map_err(|e| ToolError::Message(format!("读取响应体失败: {e}")))?;
if body_bytes.len() > MAX_RESPONSE_SIZE {
return Err(ToolError::Message(format!(
"响应体过大({} 字节),超过限制({} 字节)",
body_bytes.len(),
MAX_RESPONSE_SIZE
)));
}
let body = String::from_utf8_lossy(&body_bytes).to_string();
Ok(json!({
"url": url,
"status": status,
"html": body,
"success": (200..300).contains(&status)
}))
}
}