Skip to main content

stynx_code_tools/infrastructure/
web_fetch_tool.rs

1use stynx_code_errors::{AppError, AppResult};
2use stynx_code_types::{PermissionLevel, Tool};
3use serde_json::{Value, json};
4
5pub struct WebFetchTool;
6
7const MAX_RESPONSE_SIZE: usize = 100_000;
8const TIMEOUT_SECS: u64 = 30;
9
10#[async_trait::async_trait]
11impl Tool for WebFetchTool {
12    fn name(&self) -> &str {
13        "web_fetch"
14    }
15
16    fn description(&self) -> &str {
17        "Fetch the content of a web page at the given URL. Returns the page text content."
18    }
19
20    fn input_schema(&self) -> Value {
21        json!({
22            "type": "object",
23            "properties": {
24                "url": {
25                    "type": "string",
26                    "description": "The URL to fetch"
27                }
28            },
29            "required": ["url"]
30        })
31    }
32
33    fn permission_level(&self) -> PermissionLevel {
34        PermissionLevel::Dangerous
35    }
36
37    fn is_read_only(&self, _input: &Value) -> bool { true }
38    fn is_concurrent_safe(&self, _input: &Value) -> bool { true }
39    fn is_open_world(&self, _input: &Value) -> bool { true }
40
41    async fn execute(&self, input: Value) -> AppResult<String> {
42        let url = input
43            .get("url")
44            .and_then(|u| u.as_str())
45            .ok_or_else(|| AppError::Tool("missing 'url' field".into()))?;
46
47        let client = reqwest::Client::builder()
48            .timeout(std::time::Duration::from_secs(TIMEOUT_SECS))
49            .build()
50            .map_err(|e| AppError::Tool(format!("failed to create HTTP client: {e}")))?;
51
52        let response = client
53            .get(url)
54            .header("User-Agent", "claude-code-rs/0.2.0")
55            .send()
56            .await
57            .map_err(|e| AppError::Tool(format!("fetch failed: {e}")))?;
58
59        let status = response.status();
60        if !status.is_success() {
61            return Err(AppError::Tool(format!("HTTP {status} for {url}")));
62        }
63
64        let body = response
65            .text()
66            .await
67            .map_err(|e| AppError::Tool(format!("failed to read response body: {e}")))?;
68
69        let text = strip_html_tags(&body);
70
71        if text.len() > MAX_RESPONSE_SIZE {
72            Ok(format!(
73                "{}...\n(truncated at {}KB)",
74                &text[..MAX_RESPONSE_SIZE],
75                MAX_RESPONSE_SIZE / 1000
76            ))
77        } else {
78            Ok(text)
79        }
80    }
81}
82
83fn strip_html_tags(html: &str) -> String {
84    let mut result = String::with_capacity(html.len());
85    let mut in_tag = false;
86    let mut in_script = false;
87    let mut in_style = false;
88    let mut last_was_whitespace = false;
89
90    let lower = html.to_lowercase();
91    let chars: Vec<char> = html.chars().collect();
92    let lower_chars: Vec<char> = lower.chars().collect();
93    let len = chars.len();
94    let mut i = 0;
95
96    while i < len {
97        if !in_tag && chars[i] == '<' {
98
99            if i + 7 < len && &lower[i..i + 7] == "<script" {
100                in_script = true;
101            }
102            if i + 6 < len && &lower[i..i + 6] == "<style" {
103                in_style = true;
104            }
105            if in_script && i + 9 <= len && &lower[i..i + 9] == "</script>" {
106                in_script = false;
107                i += 9;
108                continue;
109            }
110            if in_style && i + 8 <= len && &lower[i..i + 8] == "</style>" {
111                in_style = false;
112                i += 8;
113                continue;
114            }
115            in_tag = true;
116            i += 1;
117            continue;
118        }
119
120        if in_tag {
121            if chars[i] == '>' {
122                in_tag = false;
123            }
124            i += 1;
125            continue;
126        }
127
128        if in_script || in_style {
129            i += 1;
130            continue;
131        }
132
133        if chars[i] == '&' {
134            if i + 4 < len && &html[i..i + 4] == "&lt;" {
135                result.push('<');
136                last_was_whitespace = false;
137                i += 4;
138                continue;
139            }
140            if i + 4 < len && &html[i..i + 4] == "&gt;" {
141                result.push('>');
142                last_was_whitespace = false;
143                i += 4;
144                continue;
145            }
146            if i + 5 < len && &html[i..i + 5] == "&amp;" {
147                result.push('&');
148                last_was_whitespace = false;
149                i += 5;
150                continue;
151            }
152            if i + 6 < len && &html[i..i + 6] == "&nbsp;" {
153                result.push(' ');
154                last_was_whitespace = true;
155                i += 6;
156                continue;
157            }
158        }
159
160        let c = chars[i];
161        if c.is_whitespace() {
162            if !last_was_whitespace {
163                result.push(' ');
164                last_was_whitespace = true;
165            }
166        } else {
167            result.push(c);
168            last_was_whitespace = false;
169        }
170        i += 1;
171    }
172
173    let mut cleaned = String::new();
174    let mut blank_count = 0;
175    for line in result.lines() {
176        let trimmed = line.trim();
177        if trimmed.is_empty() {
178            blank_count += 1;
179            if blank_count <= 2 {
180                cleaned.push('\n');
181            }
182        } else {
183            blank_count = 0;
184            cleaned.push_str(trimmed);
185            cleaned.push('\n');
186        }
187    }
188
189    let _ = lower_chars;
190    cleaned.trim().to_string()
191}