codetether_agent/tool/
webfetch.rs1use anyhow::{Context, Result};
4use async_trait::async_trait;
5use serde::Deserialize;
6use serde_json::{json, Value};
7use std::time::Duration;
8use super::{Tool, ToolResult};
9
10#[allow(dead_code)]
11const MAX_CONTENT_LENGTH: usize = 10 * 1024 * 1024;
12const REQUEST_TIMEOUT: Duration = Duration::from_secs(30);
13
14pub struct WebFetchTool {
15 client: reqwest::Client,
16}
17
18impl Default for WebFetchTool {
19 fn default() -> Self { Self::new() }
20}
21
22impl WebFetchTool {
23 pub fn new() -> Self {
24 let client = reqwest::Client::builder()
25 .timeout(REQUEST_TIMEOUT)
26 .user_agent("CodeTether-Agent/1.0")
27 .redirect(reqwest::redirect::Policy::limited(5))
28 .build()
29 .expect("Failed to build HTTP client");
30 Self { client }
31 }
32
33 fn html_to_markdown(&self, html: &str) -> String {
34 let mut result = html.to_string();
35 let patterns = [
36 (r"<h1[^>]*>(.*?)</h1>", "# $1\n"),
37 (r"<h2[^>]*>(.*?)</h2>", "## $1\n"),
38 (r"<h3[^>]*>(.*?)</h3>", "### $1\n"),
39 (r"<p[^>]*>(.*?)</p>", "$1\n\n"),
40 (r"<(strong|b)[^>]*>(.*?)</\1>", "**$2**"),
41 (r"<(em|i)[^>]*>(.*?)</\1>", "*$2*"),
42 (r"<code[^>]*>(.*?)</code>", "`$1`"),
43 (r"<li[^>]*>(.*?)</li>", "- $1\n"),
44 ];
45 for (pat, rep) in patterns {
46 if let Ok(re) = regex::Regex::new(pat) {
47 result = re.replace_all(&result, rep).to_string();
48 }
49 }
50 result = regex::Regex::new(r"<[^>]+>").unwrap().replace_all(&result, "").to_string();
51 result.replace(" ", " ").replace("&", "&").replace("<", "<").replace(">", ">")
52 }
53
54 fn html_to_text(&self, html: &str) -> String {
55 let md = self.html_to_markdown(html);
56 md.replace("**", "").replace("*", "").replace("`", "").replace("# ", "")
57 }
58
59 fn is_html(&self, ct: &str, body: &str) -> bool {
60 ct.contains("text/html") || body.trim().starts_with('<')
61 }
62}
63
64#[derive(Deserialize)]
65struct Params { url: String, #[serde(default = "default_fmt")] format: String }
66fn default_fmt() -> String { "markdown".into() }
67
68#[async_trait]
69impl Tool for WebFetchTool {
70 fn id(&self) -> &str { "webfetch" }
71 fn name(&self) -> &str { "Web Fetch" }
72 fn description(&self) -> &str { "Fetch content from URL, convert HTML to markdown/text/html." }
73 fn parameters(&self) -> Value {
74 json!({"type":"object","properties":{"url":{"type":"string"},"format":{"type":"string","enum":["markdown","text","html"],"default":"markdown"}},"required":["url"]})
75 }
76
77 async fn execute(&self, params: Value) -> Result<ToolResult> {
78 let p: Params = serde_json::from_value(params).context("Invalid params")?;
79 let url = p.url.parse::<reqwest::Url>().context("Invalid URL")?;
80 if url.scheme() != "http" && url.scheme() != "https" {
81 return Ok(ToolResult::error("Only HTTP/HTTPS supported"));
82 }
83 let resp = self.client.get(url).send().await.map_err(|e| anyhow::anyhow!("{}", e))?;
84 if !resp.status().is_success() {
85 return Ok(ToolResult::error(format!("HTTP {}", resp.status())));
86 }
87 let ct = resp.headers().get("content-type").and_then(|v| v.to_str().ok()).unwrap_or("").to_lowercase();
88 let body = resp.text().await.context("Failed to read body")?;
89 let content = match p.format.as_str() {
90 "html" => body,
91 "text" => if self.is_html(&ct, &body) { self.html_to_text(&body) } else { body },
92 _ => if self.is_html(&ct, &body) { self.html_to_markdown(&body) } else { body },
93 };
94 Ok(ToolResult::success(content).with_metadata("url", json!(p.url)))
95 }
96}