use crate::error::{Result, ToolError};
use crate::tools::{Tool, ToolParameters, ToolResult};
use futures::future::BoxFuture;
use reqwest::Client;
use serde_json::Value;
use std::sync::OnceLock;
use std::time::Duration;
static CLIENT: OnceLock<Client> = OnceLock::new();
fn build_client() -> &'static Client {
CLIENT.get_or_init(|| {
Client::builder()
.user_agent(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) \
AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/131.0.0.0 Safari/537.36",
)
.timeout(Duration::from_secs(20))
.redirect(reqwest::redirect::Policy::limited(5))
.build()
.unwrap_or_else(|e| {
tracing::error!("Failed to build HTTP client: {}, using default", e);
Client::new()
})
})
}
#[allow(dead_code)]
pub struct WebFetchToolEnhanced {
client: Client,
max_content_length: usize,
text_width: usize,
timeout_secs: u64,
}
impl WebFetchToolEnhanced {
pub fn new() -> Self {
Self {
client: build_client().clone(),
max_content_length: 50_000,
text_width: 120,
timeout_secs: 20,
}
}
pub fn with_max_content_length(mut self, n: usize) -> Self {
self.max_content_length = n;
self
}
pub fn with_text_width(mut self, width: usize) -> Self {
self.text_width = width;
self
}
pub fn with_timeout(mut self, secs: u64) -> Self {
self.timeout_secs = secs;
self
}
fn is_image_content_type(content_type: &str) -> bool {
content_type.starts_with("image/")
}
fn is_image_url(url: &str) -> bool {
let lower = url.to_lowercase();
lower.ends_with(".png")
|| lower.ends_with(".jpg")
|| lower.ends_with(".jpeg")
|| lower.ends_with(".gif")
|| lower.ends_with(".webp")
|| lower.ends_with(".bmp")
|| lower.ends_with(".svg")
|| lower.ends_with(".ico")
}
fn html_to_text(&self, html: &str) -> String {
match html2text::from_read(html.as_bytes(), self.text_width) {
Ok(text) => text,
Err(e) => {
tracing::warn!(
"HTML to text conversion failed: {}, falling back to tag removal",
e
);
let re = regex::Regex::new(r"<[^>]+>").unwrap();
re.replace_all(html, "").to_string()
}
}
}
fn needs_html_conversion(content_type: &str) -> bool {
content_type.contains("text/html") || content_type.contains("application/xhtml")
}
fn truncate_content(content: &str, max_len: usize) -> String {
if content.chars().count() <= max_len {
content.to_string()
} else {
let truncated: String = content.chars().take(max_len).collect();
format!("{}\n\n[... content truncated ...]", truncated)
}
}
#[allow(dead_code)]
async fn download_image_as_base64(&self, url: &str) -> Result<(String, String, usize)> {
let response = self.client.get(url).send().await.map_err(|e| {
crate::error::ReactError::Tool(ToolError::ExecutionFailed {
tool: "web_fetch_enhanced".into(),
message: format!("Failed to download image: {}", e),
})
})?;
if !response.status().is_success() {
return Err(crate::error::ReactError::Tool(ToolError::ExecutionFailed {
tool: "web_fetch_enhanced".into(),
message: format!("HTTP error: {}", response.status()),
}));
}
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("image/jpeg")
.to_string();
let mime_subtype = content_type.split('/').nth(1).unwrap_or("png");
let bytes = response.bytes().await.map_err(|e| {
crate::error::ReactError::Tool(ToolError::ExecutionFailed {
tool: "web_fetch_enhanced".into(),
message: format!("Failed to read image data: {}", e),
})
})?;
let size = bytes.len();
use base64::Engine;
let base64_data = base64::engine::general_purpose::STANDARD.encode(&bytes);
let data_uri = format!("data:image/{};base64,{}", mime_subtype, base64_data);
Ok((data_uri, content_type.to_string(), size))
}
}
impl Default for WebFetchToolEnhanced {
fn default() -> Self {
Self::new()
}
}
impl Tool for WebFetchToolEnhanced {
fn name(&self) -> &str {
"web_fetch_enhanced"
}
fn description(&self) -> &str {
"Enhanced web fetch tool, supports: HTML-to-text, JSON formatting, image download to base64. \
Parameters: url - web address (required), mode - processing mode (text/json/image, default text), \
max_length - maximum content length (optional, default 50000 chars)"
}
fn parameters(&self) -> Value {
serde_json::json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to fetch content from"
},
"mode": {
"type": "string",
"enum": ["text", "json", "image"],
"description": "Processing mode: text - extract text, image - download image as base64, json - return raw JSON"
},
"max_length": {
"type": "integer",
"description": "Maximum content length to return (characters, default 50000)"
}
},
"required": ["url"]
})
}
fn execute(&self, parameters: ToolParameters) -> BoxFuture<'_, Result<ToolResult>> {
Box::pin(async move {
let url = parameters
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| {
crate::error::ReactError::Tool(ToolError::MissingParameter("url".to_string()))
})?;
if url.trim().is_empty() {
return Ok(ToolResult::error("URL cannot be empty"));
}
if !url.starts_with("http://") && !url.starts_with("https://") {
return Ok(ToolResult::error("URL must start with http:// or https://"));
}
let mode = parameters
.get("mode")
.and_then(|v| v.as_str())
.unwrap_or("text");
let max_length = parameters
.get("max_length")
.and_then(|v| v.as_u64())
.unwrap_or(50_000) as usize;
tracing::info!("WebFetchEnhanced: url='{}', mode='{}'", url, mode);
let client = Client::builder()
.user_agent("Mozilla/5.0 (compatible; EchoAgent/1.0)")
.timeout(Duration::from_secs(30))
.build()
.unwrap_or_else(|e| {
tracing::error!("Failed to build HTTP client: {}, using default", e);
Client::new()
});
let is_image_url = Self::is_image_url(url);
if mode == "image" || is_image_url {
let response = client.head(url).send().await.map_err(|e| {
crate::error::ReactError::Tool(ToolError::ExecutionFailed {
tool: "web_fetch_enhanced".into(),
message: format!("HEAD request failed: {}", e),
})
})?;
if !response.status().is_success() {
return Ok(ToolResult::error(format!(
"HTTP error: {}",
response.status()
)));
}
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("application/octet-stream");
if !Self::is_image_content_type(content_type) {
return Ok(ToolResult::error(format!(
"URL is not an image, Content-Type: {}",
content_type
)));
}
let result: std::result::Result<(String, String, usize), ToolError> =
async {
let response = client.get(url).send().await.map_err(|e| {
ToolError::ExecutionFailed {
tool: "web_fetch_enhanced".into(),
message: format!("Failed to download image: {}", e),
}
})?;
if !response.status().is_success() {
return Err(ToolError::ExecutionFailed {
tool: "web_fetch_enhanced".into(),
message: format!("HTTP error: {}", response.status()),
});
}
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("image/jpeg")
.to_string();
let mime_subtype = content_type.split('/').nth(1).unwrap_or("png");
let bytes =
response
.bytes()
.await
.map_err(|e| ToolError::ExecutionFailed {
tool: "web_fetch_enhanced".into(),
message: format!("Failed to read image data: {}", e),
})?;
let size = bytes.len();
use base64::Engine;
let base64_data = base64::engine::general_purpose::STANDARD.encode(&bytes);
Ok((
format!("data:image/{};base64,{}", mime_subtype, base64_data),
content_type,
size,
))
}
.await;
let (data_uri, ct, size) = result.map_err(crate::error::ReactError::Tool)?;
let data_uri_display = if data_uri.len() > 1000 {
format!("{}... ({} chars total)", &data_uri[..1000], data_uri.len())
} else {
data_uri.clone()
};
let output = format!(
"URL: {}\nContent-Type: {}\nSize: {} bytes\n\nBase64 Data URI:\n{}",
url, ct, size, data_uri_display
);
return Ok(ToolResult::success(output));
}
let response = client.get(url).send().await.map_err(|e| {
crate::error::ReactError::Tool(ToolError::ExecutionFailed {
tool: "web_fetch_enhanced".into(),
message: format!("Request failed: {}", e),
})
})?;
let status = response.status();
if !status.is_success() {
return Ok(ToolResult::error(format!("HTTP error: {}", status)));
}
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("text/html")
.to_string();
let body = response.text().await.map_err(|e| {
crate::error::ReactError::Tool(ToolError::ExecutionFailed {
tool: "web_fetch_enhanced".into(),
message: format!("Failed to read response body: {}", e),
})
})?;
let content = if mode == "json" {
if serde_json::from_str::<Value>(&body).is_err() {
format!("{}\n\n[Warning: content is not valid JSON]", body)
} else {
body
}
} else {
if Self::needs_html_conversion(&content_type) {
self.html_to_text(&body)
} else {
body
}
};
let content = Self::truncate_content(&content, max_length);
let output = format!(
"URL: {}\nStatus: {}\nContent-Type: {}\n\n{}",
url, status, content_type, content
);
Ok(ToolResult::success(output))
})
}
}