Skip to main content

claude_rust_tools/infrastructure/
web_fetch_tool.rs

1use claude_rust_errors::{AppError, AppResult};
2use claude_rust_types::{PermissionLevel, Tool};
3use serde_json::{Value, json};
4
5pub struct WebFetchTool;
6
7const MAX_RESPONSE_SIZE: usize = 100_000;
8const TIMEOUT_SECS: u64 = 30;
9
10#[async_trait::async_trait]
11impl Tool for WebFetchTool {
12    fn name(&self) -> &str {
13        "web_fetch"
14    }
15
16    fn description(&self) -> &str {
17        "Fetch the content of a web page at the given URL. Returns the page text content."
18    }
19
20    fn input_schema(&self) -> Value {
21        json!({
22            "type": "object",
23            "properties": {
24                "url": {
25                    "type": "string",
26                    "description": "The URL to fetch"
27                }
28            },
29            "required": ["url"]
30        })
31    }
32
33    fn permission_level(&self) -> PermissionLevel {
34        PermissionLevel::Dangerous
35    }
36
37    fn is_read_only(&self, _input: &Value) -> bool { true }
38    fn is_concurrent_safe(&self, _input: &Value) -> bool { true }
39    fn is_open_world(&self, _input: &Value) -> bool { true }
40
41    async fn execute(&self, input: Value) -> AppResult<String> {
42        let url = input
43            .get("url")
44            .and_then(|u| u.as_str())
45            .ok_or_else(|| AppError::Tool("missing 'url' field".into()))?;
46
47        let client = reqwest::Client::builder()
48            .timeout(std::time::Duration::from_secs(TIMEOUT_SECS))
49            .build()
50            .map_err(|e| AppError::Tool(format!("failed to create HTTP client: {e}")))?;
51
52        let response = client
53            .get(url)
54            .header("User-Agent", "claude-code-rs/0.2.0")
55            .send()
56            .await
57            .map_err(|e| AppError::Tool(format!("fetch failed: {e}")))?;
58
59        let status = response.status();
60        if !status.is_success() {
61            return Err(AppError::Tool(format!("HTTP {status} for {url}")));
62        }
63
64        let body = response
65            .text()
66            .await
67            .map_err(|e| AppError::Tool(format!("failed to read response body: {e}")))?;
68
69        // Strip HTML tags for readability
70        let text = strip_html_tags(&body);
71
72        // Truncate if too large
73        if text.len() > MAX_RESPONSE_SIZE {
74            Ok(format!(
75                "{}...\n(truncated at {}KB)",
76                &text[..MAX_RESPONSE_SIZE],
77                MAX_RESPONSE_SIZE / 1000
78            ))
79        } else {
80            Ok(text)
81        }
82    }
83}
84
85/// Basic HTML tag stripping via simple state machine.
86fn strip_html_tags(html: &str) -> String {
87    let mut result = String::with_capacity(html.len());
88    let mut in_tag = false;
89    let mut in_script = false;
90    let mut in_style = false;
91    let mut last_was_whitespace = false;
92
93    let lower = html.to_lowercase();
94    let chars: Vec<char> = html.chars().collect();
95    let lower_chars: Vec<char> = lower.chars().collect();
96    let len = chars.len();
97    let mut i = 0;
98
99    while i < len {
100        if !in_tag && chars[i] == '<' {
101            // Check for script/style tags
102            if i + 7 < len && &lower[i..i + 7] == "<script" {
103                in_script = true;
104            }
105            if i + 6 < len && &lower[i..i + 6] == "<style" {
106                in_style = true;
107            }
108            if in_script && i + 9 <= len && &lower[i..i + 9] == "</script>" {
109                in_script = false;
110                i += 9;
111                continue;
112            }
113            if in_style && i + 8 <= len && &lower[i..i + 8] == "</style>" {
114                in_style = false;
115                i += 8;
116                continue;
117            }
118            in_tag = true;
119            i += 1;
120            continue;
121        }
122
123        if in_tag {
124            if chars[i] == '>' {
125                in_tag = false;
126            }
127            i += 1;
128            continue;
129        }
130
131        if in_script || in_style {
132            i += 1;
133            continue;
134        }
135
136        // Decode common entities
137        if chars[i] == '&' {
138            if i + 4 < len && &html[i..i + 4] == "&lt;" {
139                result.push('<');
140                last_was_whitespace = false;
141                i += 4;
142                continue;
143            }
144            if i + 4 < len && &html[i..i + 4] == "&gt;" {
145                result.push('>');
146                last_was_whitespace = false;
147                i += 4;
148                continue;
149            }
150            if i + 5 < len && &html[i..i + 5] == "&amp;" {
151                result.push('&');
152                last_was_whitespace = false;
153                i += 5;
154                continue;
155            }
156            if i + 6 < len && &html[i..i + 6] == "&nbsp;" {
157                result.push(' ');
158                last_was_whitespace = true;
159                i += 6;
160                continue;
161            }
162        }
163
164        let c = chars[i];
165        if c.is_whitespace() {
166            if !last_was_whitespace {
167                result.push(' ');
168                last_was_whitespace = true;
169            }
170        } else {
171            result.push(c);
172            last_was_whitespace = false;
173        }
174        i += 1;
175    }
176
177    // Collapse multiple blank lines
178    let mut cleaned = String::new();
179    let mut blank_count = 0;
180    for line in result.lines() {
181        let trimmed = line.trim();
182        if trimmed.is_empty() {
183            blank_count += 1;
184            if blank_count <= 2 {
185                cleaned.push('\n');
186            }
187        } else {
188            blank_count = 0;
189            cleaned.push_str(trimmed);
190            cleaned.push('\n');
191        }
192    }
193
194    let _ = lower_chars; // suppress unused warning
195    cleaned.trim().to_string()
196}