Skip to main content

claude_rust_tools/infrastructure/
web_fetch_tool.rs

1use claude_rust_errors::{AppError, AppResult};
2use claude_rust_types::{PermissionLevel, Tool};
3use serde_json::{Value, json};
4
5pub struct WebFetchTool;
6
7const MAX_RESPONSE_SIZE: usize = 100_000;
8const TIMEOUT_SECS: u64 = 30;
9
10#[async_trait::async_trait]
11impl Tool for WebFetchTool {
12    fn name(&self) -> &str {
13        "web_fetch"
14    }
15
16    fn description(&self) -> &str {
17        "Fetch the content of a web page at the given URL. Returns the page text content."
18    }
19
20    fn input_schema(&self) -> Value {
21        json!({
22            "type": "object",
23            "properties": {
24                "url": {
25                    "type": "string",
26                    "description": "The URL to fetch"
27                }
28            },
29            "required": ["url"]
30        })
31    }
32
33    fn permission_level(&self) -> PermissionLevel {
34        PermissionLevel::Dangerous
35    }
36
37    async fn execute(&self, input: Value) -> AppResult<String> {
38        let url = input
39            .get("url")
40            .and_then(|u| u.as_str())
41            .ok_or_else(|| AppError::Tool("missing 'url' field".into()))?;
42
43        let client = reqwest::Client::builder()
44            .timeout(std::time::Duration::from_secs(TIMEOUT_SECS))
45            .build()
46            .map_err(|e| AppError::Tool(format!("failed to create HTTP client: {e}")))?;
47
48        let response = client
49            .get(url)
50            .header("User-Agent", "claude-code-rs/0.2.0")
51            .send()
52            .await
53            .map_err(|e| AppError::Tool(format!("fetch failed: {e}")))?;
54
55        let status = response.status();
56        if !status.is_success() {
57            return Err(AppError::Tool(format!("HTTP {status} for {url}")));
58        }
59
60        let body = response
61            .text()
62            .await
63            .map_err(|e| AppError::Tool(format!("failed to read response body: {e}")))?;
64
65        // Strip HTML tags for readability
66        let text = strip_html_tags(&body);
67
68        // Truncate if too large
69        if text.len() > MAX_RESPONSE_SIZE {
70            Ok(format!(
71                "{}...\n(truncated at {}KB)",
72                &text[..MAX_RESPONSE_SIZE],
73                MAX_RESPONSE_SIZE / 1000
74            ))
75        } else {
76            Ok(text)
77        }
78    }
79}
80
81/// Basic HTML tag stripping via simple state machine.
82fn strip_html_tags(html: &str) -> String {
83    let mut result = String::with_capacity(html.len());
84    let mut in_tag = false;
85    let mut in_script = false;
86    let mut in_style = false;
87    let mut last_was_whitespace = false;
88
89    let lower = html.to_lowercase();
90    let chars: Vec<char> = html.chars().collect();
91    let lower_chars: Vec<char> = lower.chars().collect();
92    let len = chars.len();
93    let mut i = 0;
94
95    while i < len {
96        if !in_tag && chars[i] == '<' {
97            // Check for script/style tags
98            if i + 7 < len && &lower[i..i + 7] == "<script" {
99                in_script = true;
100            }
101            if i + 6 < len && &lower[i..i + 6] == "<style" {
102                in_style = true;
103            }
104            if in_script && i + 9 <= len && &lower[i..i + 9] == "</script>" {
105                in_script = false;
106                i += 9;
107                continue;
108            }
109            if in_style && i + 8 <= len && &lower[i..i + 8] == "</style>" {
110                in_style = false;
111                i += 8;
112                continue;
113            }
114            in_tag = true;
115            i += 1;
116            continue;
117        }
118
119        if in_tag {
120            if chars[i] == '>' {
121                in_tag = false;
122            }
123            i += 1;
124            continue;
125        }
126
127        if in_script || in_style {
128            i += 1;
129            continue;
130        }
131
132        // Decode common entities
133        if chars[i] == '&' {
134            if i + 4 < len && &html[i..i + 4] == "&lt;" {
135                result.push('<');
136                last_was_whitespace = false;
137                i += 4;
138                continue;
139            }
140            if i + 4 < len && &html[i..i + 4] == "&gt;" {
141                result.push('>');
142                last_was_whitespace = false;
143                i += 4;
144                continue;
145            }
146            if i + 5 < len && &html[i..i + 5] == "&amp;" {
147                result.push('&');
148                last_was_whitespace = false;
149                i += 5;
150                continue;
151            }
152            if i + 6 < len && &html[i..i + 6] == "&nbsp;" {
153                result.push(' ');
154                last_was_whitespace = true;
155                i += 6;
156                continue;
157            }
158        }
159
160        let c = chars[i];
161        if c.is_whitespace() {
162            if !last_was_whitespace {
163                result.push(' ');
164                last_was_whitespace = true;
165            }
166        } else {
167            result.push(c);
168            last_was_whitespace = false;
169        }
170        i += 1;
171    }
172
173    // Collapse multiple blank lines
174    let mut cleaned = String::new();
175    let mut blank_count = 0;
176    for line in result.lines() {
177        let trimmed = line.trim();
178        if trimmed.is_empty() {
179            blank_count += 1;
180            if blank_count <= 2 {
181                cleaned.push('\n');
182            }
183        } else {
184            blank_count = 0;
185            cleaned.push_str(trimmed);
186            cleaned.push('\n');
187        }
188    }
189
190    let _ = lower_chars; // suppress unused warning
191    cleaned.trim().to_string()
192}