use anyhow::Result;
use async_trait::async_trait;
use reqwest::header::USER_AGENT;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use super::{Tool, ToolOutput};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebFetchInput {
pub url: String,
pub prompt: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebFetchOutput {
pub url: String,
pub text: String,
}
pub struct WebFetchTool {
client: reqwest::Client,
}
impl WebFetchTool {
pub fn new() -> Self {
Self {
client: reqwest::Client::new(),
}
}
}
impl Default for WebFetchTool {
fn default() -> Self {
Self::new()
}
}
fn remove_tag_content(html: &str, tag_name: &str) -> String {
let open_tag = format!("<{}", tag_name);
let close_tag = format!("</{}", tag_name);
let mut result = String::new();
let mut cursor = 0;
while let Some(start_idx) = html[cursor..].to_lowercase().find(&open_tag) {
let absolute_start = cursor + start_idx;
result.push_str(&html[cursor..absolute_start]);
let remaining = &html[absolute_start..];
if let Some(end_idx) = remaining.to_lowercase().find(&close_tag) {
let absolute_end = absolute_start + end_idx + close_tag.len();
if let Some(tag_closing_bracket) = html[absolute_end..].find('>') {
cursor = absolute_end + tag_closing_bracket + 1;
} else {
cursor = absolute_end;
}
} else {
cursor = html.len();
break;
}
}
if cursor < html.len() {
result.push_str(&html[cursor..]);
}
result
}
fn strip_all_tags(html: &str) -> String {
let mut result = String::new();
let mut in_tag = false;
for ch in html.chars() {
if ch == '<' {
in_tag = true;
} else if ch == '>' {
in_tag = false;
result.push(' ');
} else if !in_tag {
result.push(ch);
}
}
let cleaned = result
.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace(""", "\"")
.replace("'", "'")
.replace("'", "'")
.replace(" ", " ");
let mut normalized = String::new();
let mut last_was_whitespace = false;
for ch in cleaned.chars() {
if ch.is_whitespace() {
if !last_was_whitespace {
normalized.push(' ');
last_was_whitespace = true;
}
} else {
normalized.push(ch);
last_was_whitespace = false;
}
}
normalized.trim().to_string()
}
#[async_trait]
impl Tool for WebFetchTool {
fn name(&self) -> &str {
"WebFetch"
}
fn description(&self) -> &str {
"Fetch a URL and convert it into readable text."
}
fn parameters_schema(&self) -> Value {
serde_json::json!({
"type": "object",
"properties": {
"url": { "type": "string", "format": "uri" },
"prompt": { "type": "string" }
},
"required": ["url", "prompt"],
"additionalProperties": false
})
}
async fn execute(&self, input: Value) -> Result<ToolOutput> {
let fetch_input: WebFetchInput = serde_json::from_value(input)?;
let res = self
.client
.get(&fetch_input.url)
.header(
USER_AGENT,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
)
.send()
.await?;
let html_content = res.text().await?;
let mut text = remove_tag_content(&html_content, "script");
text = remove_tag_content(&text, "style");
text = strip_all_tags(&text);
if text.len() > 100_000 {
text.truncate(100_000);
text.push_str("\n\n[Content truncated — exceeded 100,000 characters]");
}
let output = WebFetchOutput {
url: fetch_input.url,
text,
};
let serialized = serde_json::to_string_pretty(&output)?;
Ok(ToolOutput::success(serialized))
}
}