claude_rust_tools/infrastructure/
web_fetch_tool.rs1use claude_rust_errors::{AppError, AppResult};
2use claude_rust_types::{PermissionLevel, Tool};
3use serde_json::{Value, json};
4
5pub struct WebFetchTool;
6
7const MAX_RESPONSE_SIZE: usize = 100_000;
8const TIMEOUT_SECS: u64 = 30;
9
10#[async_trait::async_trait]
11impl Tool for WebFetchTool {
12 fn name(&self) -> &str {
13 "web_fetch"
14 }
15
16 fn description(&self) -> &str {
17 "Fetch the content of a web page at the given URL. Returns the page text content."
18 }
19
20 fn input_schema(&self) -> Value {
21 json!({
22 "type": "object",
23 "properties": {
24 "url": {
25 "type": "string",
26 "description": "The URL to fetch"
27 }
28 },
29 "required": ["url"]
30 })
31 }
32
33 fn permission_level(&self) -> PermissionLevel {
34 PermissionLevel::Dangerous
35 }
36
37 async fn execute(&self, input: Value) -> AppResult<String> {
38 let url = input
39 .get("url")
40 .and_then(|u| u.as_str())
41 .ok_or_else(|| AppError::Tool("missing 'url' field".into()))?;
42
43 let client = reqwest::Client::builder()
44 .timeout(std::time::Duration::from_secs(TIMEOUT_SECS))
45 .build()
46 .map_err(|e| AppError::Tool(format!("failed to create HTTP client: {e}")))?;
47
48 let response = client
49 .get(url)
50 .header("User-Agent", "claude-code-rs/0.2.0")
51 .send()
52 .await
53 .map_err(|e| AppError::Tool(format!("fetch failed: {e}")))?;
54
55 let status = response.status();
56 if !status.is_success() {
57 return Err(AppError::Tool(format!("HTTP {status} for {url}")));
58 }
59
60 let body = response
61 .text()
62 .await
63 .map_err(|e| AppError::Tool(format!("failed to read response body: {e}")))?;
64
65 let text = strip_html_tags(&body);
67
68 if text.len() > MAX_RESPONSE_SIZE {
70 Ok(format!(
71 "{}...\n(truncated at {}KB)",
72 &text[..MAX_RESPONSE_SIZE],
73 MAX_RESPONSE_SIZE / 1000
74 ))
75 } else {
76 Ok(text)
77 }
78 }
79}
80
81fn strip_html_tags(html: &str) -> String {
83 let mut result = String::with_capacity(html.len());
84 let mut in_tag = false;
85 let mut in_script = false;
86 let mut in_style = false;
87 let mut last_was_whitespace = false;
88
89 let lower = html.to_lowercase();
90 let chars: Vec<char> = html.chars().collect();
91 let lower_chars: Vec<char> = lower.chars().collect();
92 let len = chars.len();
93 let mut i = 0;
94
95 while i < len {
96 if !in_tag && chars[i] == '<' {
97 if i + 7 < len && &lower[i..i + 7] == "<script" {
99 in_script = true;
100 }
101 if i + 6 < len && &lower[i..i + 6] == "<style" {
102 in_style = true;
103 }
104 if in_script && i + 9 <= len && &lower[i..i + 9] == "</script>" {
105 in_script = false;
106 i += 9;
107 continue;
108 }
109 if in_style && i + 8 <= len && &lower[i..i + 8] == "</style>" {
110 in_style = false;
111 i += 8;
112 continue;
113 }
114 in_tag = true;
115 i += 1;
116 continue;
117 }
118
119 if in_tag {
120 if chars[i] == '>' {
121 in_tag = false;
122 }
123 i += 1;
124 continue;
125 }
126
127 if in_script || in_style {
128 i += 1;
129 continue;
130 }
131
132 if chars[i] == '&' {
134 if i + 4 < len && &html[i..i + 4] == "<" {
135 result.push('<');
136 last_was_whitespace = false;
137 i += 4;
138 continue;
139 }
140 if i + 4 < len && &html[i..i + 4] == ">" {
141 result.push('>');
142 last_was_whitespace = false;
143 i += 4;
144 continue;
145 }
146 if i + 5 < len && &html[i..i + 5] == "&" {
147 result.push('&');
148 last_was_whitespace = false;
149 i += 5;
150 continue;
151 }
152 if i + 6 < len && &html[i..i + 6] == " " {
153 result.push(' ');
154 last_was_whitespace = true;
155 i += 6;
156 continue;
157 }
158 }
159
160 let c = chars[i];
161 if c.is_whitespace() {
162 if !last_was_whitespace {
163 result.push(' ');
164 last_was_whitespace = true;
165 }
166 } else {
167 result.push(c);
168 last_was_whitespace = false;
169 }
170 i += 1;
171 }
172
173 let mut cleaned = String::new();
175 let mut blank_count = 0;
176 for line in result.lines() {
177 let trimmed = line.trim();
178 if trimmed.is_empty() {
179 blank_count += 1;
180 if blank_count <= 2 {
181 cleaned.push('\n');
182 }
183 } else {
184 blank_count = 0;
185 cleaned.push_str(trimmed);
186 cleaned.push('\n');
187 }
188 }
189
190 let _ = lower_chars; cleaned.trim().to_string()
192}