Skip to main content

hh_cli/tool/
web.rs

1use crate::tool::{Tool, ToolResult, ToolSchema};
2use async_trait::async_trait;
3use reqwest::StatusCode;
4use scraper::{Html, Selector};
5use serde::Serialize;
6use serde_json::{Value, json};
7
8pub struct WebFetchTool {
9    client: reqwest::Client,
10}
11
12pub struct WebSearchTool {
13    client: reqwest::Client,
14}
15
16#[derive(Debug, Serialize)]
17struct WebFetchOutput {
18    url: String,
19    status_code: u16,
20    ok: bool,
21    body: String,
22}
23
24#[derive(Debug, Serialize)]
25struct SearchResult {
26    title: String,
27    url: String,
28    snippet: String,
29}
30
31#[derive(Debug, Serialize)]
32struct WebSearchOutput {
33    query: String,
34    count: usize,
35    results: Vec<SearchResult>,
36}
37
38enum WebRequestError {
39    Request(reqwest::Error),
40    ReadBody(reqwest::Error),
41}
42
43async fn send_and_read_text(
44    request: reqwest::RequestBuilder,
45) -> Result<(StatusCode, String), WebRequestError> {
46    let response = request.send().await.map_err(WebRequestError::Request)?;
47    let status = response.status();
48    let body = response.text().await.map_err(WebRequestError::ReadBody)?;
49    Ok((status, body))
50}
51
52impl Default for WebFetchTool {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57
58impl WebFetchTool {
59    pub fn new() -> Self {
60        Self {
61            client: reqwest::Client::new(),
62        }
63    }
64}
65
66#[async_trait]
67impl Tool for WebFetchTool {
68    fn schema(&self) -> ToolSchema {
69        ToolSchema {
70            name: "web_fetch".to_string(),
71            description: "Fetch content from a URL".to_string(),
72            capability: Some("web".to_string()),
73            mutating: Some(false),
74            parameters: json!({
75                "type": "object",
76                "properties": {
77                    "url": {"type": "string"}
78                },
79                "required": ["url"]
80            }),
81        }
82    }
83
84    async fn execute(&self, args: Value) -> ToolResult {
85        let url = args.get("url").and_then(|v| v.as_str()).unwrap_or_default();
86        let (status, body) = match send_and_read_text(self.client.get(url)).await {
87            Ok(result) => result,
88            Err(WebRequestError::Request(err)) => {
89                return ToolResult::err_text("request_error", err.to_string());
90            }
91            Err(WebRequestError::ReadBody(err)) => {
92                return ToolResult::err_text("read_body_error", err.to_string());
93            }
94        };
95
96        let output = WebFetchOutput {
97            url: url.to_string(),
98            status_code: status.as_u16(),
99            ok: status.is_success(),
100            body,
101        };
102
103        if status.is_success() {
104            ToolResult::ok_json_serializable("ok", &output)
105        } else {
106            let payload = serde_json::to_value(&output)
107                .unwrap_or_else(|_| json!({"status_code": status.as_u16()}));
108            ToolResult::err_json("request_failed", payload)
109        }
110    }
111}
112
113impl Default for WebSearchTool {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119impl WebSearchTool {
120    pub fn new() -> Self {
121        Self {
122            client: reqwest::Client::builder()
123                .user_agent("Mozilla/5.0 (compatible; hh-agent/1.0)")
124                .build()
125                .unwrap_or_else(|_| reqwest::Client::new()),
126        }
127    }
128}
129
130#[async_trait]
131impl Tool for WebSearchTool {
132    fn schema(&self) -> ToolSchema {
133        ToolSchema {
134            name: "web_search".to_string(),
135            description: "Search the web for information. Returns search results with titles, snippets, and URLs.".to_string(),
136            capability: Some("web".to_string()),
137            mutating: Some(false),
138            parameters: json!({
139                "type": "object",
140                "properties": {
141                    "query": {
142                        "type": "string",
143                        "description": "The search query"
144                    }
145                },
146                "required": ["query"]
147            }),
148        }
149    }
150
151    async fn execute(&self, args: Value) -> ToolResult {
152        let query = args
153            .get("query")
154            .and_then(|v| v.as_str())
155            .unwrap_or_default();
156
157        if query.is_empty() {
158            return ToolResult::err_text("invalid_input", "query is required");
159        }
160
161        let url = format!(
162            "https://html.duckduckgo.com/html/?q={}",
163            urlencoding::encode(query)
164        );
165
166        let (status, html) = match send_and_read_text(self.client.get(&url)).await {
167            Ok(result) => result,
168            Err(WebRequestError::Request(err)) => {
169                return ToolResult::err_text(
170                    "request_error",
171                    format!("search request failed: {}", err),
172                );
173            }
174            Err(WebRequestError::ReadBody(err)) => {
175                return ToolResult::err_text(
176                    "read_body_error",
177                    format!("failed to read response: {}", err),
178                );
179            }
180        };
181
182        if !status.is_success() {
183            return ToolResult::err_text(
184                "search_failed",
185                format!("search failed: status={status}"),
186            );
187        }
188
189        let results = parse_ddg_results(&html);
190        let output = WebSearchOutput {
191            query: query.to_string(),
192            count: results.len(),
193            results,
194        };
195        ToolResult::ok_json_serializable("ok", &output)
196    }
197}
198
199fn parse_ddg_results(html: &str) -> Vec<SearchResult> {
200    let document = Html::parse_document(html);
201    let result_selector = match Selector::parse(".result") {
202        Ok(s) => s,
203        Err(_) => return Vec::new(),
204    };
205    let title_selector = match Selector::parse(".result__a") {
206        Ok(s) => s,
207        Err(_) => return Vec::new(),
208    };
209    let snippet_selector = match Selector::parse(".result__snippet") {
210        Ok(s) => s,
211        Err(_) => return Vec::new(),
212    };
213
214    let mut results = Vec::new();
215
216    for result in document.select(&result_selector) {
217        let title_el = result.select(&title_selector).next();
218        let title = title_el
219            .map(|el| el.text().collect::<String>())
220            .unwrap_or_default()
221            .trim()
222            .to_string();
223
224        let url = title_el
225            .and_then(|el| el.value().attr("href"))
226            .and_then(extract_ddg_url)
227            .unwrap_or_default();
228
229        let snippet = result
230            .select(&snippet_selector)
231            .next()
232            .map(|el| el.text().collect::<String>())
233            .unwrap_or_default()
234            .trim()
235            .to_string();
236
237        if !title.is_empty() {
238            results.push(SearchResult {
239                title,
240                url,
241                snippet,
242            });
243        }
244
245        if results.len() >= 5 {
246            break;
247        }
248    }
249
250    results
251}
252
253fn extract_ddg_url(redirect_url: &str) -> Option<String> {
254    // DuckDuckGo redirect URLs are like: /l/?uddg=URL&rut=...
255    let prefix = "/l/?uddg=";
256    if let Some(start) = redirect_url.find(prefix) {
257        let encoded = &redirect_url[start + prefix.len()..];
258        let encoded = if let Some(end) = encoded.find('&') {
259            &encoded[..end]
260        } else {
261            encoded
262        };
263        // URL decode the result
264        return Some(urlencoding_decode(encoded));
265    }
266    None
267}
268
269fn urlencoding_decode(s: &str) -> String {
270    // Simple URL decoding - replace + with space and decode %xx sequences
271    let mut result = String::with_capacity(s.len());
272    let mut chars = s.chars().peekable();
273    while let Some(c) = chars.next() {
274        if c == '+' {
275            result.push(' ');
276        } else if c == '%' {
277            let hex: String = chars.by_ref().take(2).collect();
278            if let Ok(byte) = u8::from_str_radix(&hex, 16) {
279                result.push(byte as char);
280            } else {
281                result.push('%');
282                result.push_str(&hex);
283            }
284        } else {
285            result.push(c);
286        }
287    }
288    result
289}