1use anyhow::Result;
2use async_trait::async_trait;
3use serde::Deserialize;
4use serde_json::json;
5use tokio::process::Command;
6
7use super::{ApprovalRequirement, Tool, ToolContext, ToolDef, ToolResult};
8
9fn ceil_char_boundary(s: &str, index: usize) -> usize {
12 if index >= s.len() {
13 return s.len();
14 }
15 let mut i = index;
16 while i < s.len() && !s.is_char_boundary(i) {
17 i += 1;
18 }
19 i
20}
21
22fn floor_char_boundary(s: &str, index: usize) -> usize {
24 if index >= s.len() {
25 return s.len();
26 }
27 let mut i = index;
28 while i > 0 && !s.is_char_boundary(i) {
29 i -= 1;
30 }
31 i
32}
33
34pub struct WebSearchTool;
35
36#[derive(Deserialize)]
37struct WebSearchArgs {
38 query: String,
39 #[serde(default = "default_max")]
40 max_results: usize,
41}
42
43fn default_max() -> usize {
44 8
45}
46
47#[async_trait]
48impl Tool for WebSearchTool {
49 fn definition(&self) -> ToolDef {
50 ToolDef {
51 name: "web_search",
52 description: "Search the web for information. Returns titles, URLs, and snippets.\n\
53 Use when you need to find documentation, look up APIs, research libraries, \
54 or find information not available locally.\n\
55 Examples:\n\
56 - {\"query\": \"openclaw github\"}\n\
57 - {\"query\": \"tailwindcss v4 installation guide\"}\n\
58 - {\"query\": \"rust reqwest POST example\"}"
59 .to_string(),
60 parameters: json!({
61 "type": "object",
62 "properties": {
63 "query": { "type": "string", "description": "Search query" },
64 "max_results": { "type": "integer", "description": "Max results (default 8)" }
65 },
66 "required": ["query"]
67 }),
68 }
69 }
70
71 fn approval(&self, _args: &str) -> ApprovalRequirement {
72 ApprovalRequirement::AutoApprove
73 }
74
75 async fn execute(&self, args: &str, _ctx: &ToolContext) -> Result<ToolResult> {
76 let parsed: WebSearchArgs = serde_json::from_str(args)?;
77 let max = parsed.max_results.min(20);
78
79 let query_encoded = parsed.query.replace(' ', "+");
82 let curl_bin = if cfg!(target_os = "windows") {
83 "curl.exe"
84 } else {
85 "curl"
86 };
87 let mut cmd = Command::new(curl_bin);
88 cmd.args(&[
89 "-s", "-X", "POST",
90 "https://html.duckduckgo.com/html/",
91 "-d", &format!("q={}", query_encoded),
92 "-A", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)",
93 "--max-time", "15",
94 "-L", ]);
96
97 crate::process_utils::suppress_console_window(&mut cmd);
99
100 let output = cmd.output().await;
101
102 let html = match output {
103 Ok(o) => String::from_utf8_lossy(&o.stdout).to_string(),
104 Err(e) => {
105 return Ok(ToolResult {
106 call_id: String::new(),
107 output: format!("Search failed: {}", e),
108 success: false,
109 });
110 }
111 };
112
113 if html.is_empty() {
114 return Ok(ToolResult {
115 call_id: String::new(),
116 output: format!("Search returned empty response for '{}'", parsed.query),
117 success: false,
118 });
119 }
120
121 let results = parse_ddg_results(&html, max);
122
123 if results.is_empty() {
124 return Ok(ToolResult {
125 call_id: String::new(),
126 output: format!(
127 "No results found for '{}' ({} bytes received)",
128 parsed.query,
129 html.len()
130 ),
131 success: false,
132 });
133 }
134
135 let mut out = format!("Search results for \"{}\":\n\n", parsed.query);
136 for (i, r) in results.iter().enumerate() {
137 out.push_str(&format!(
138 "{}. {}\n {}\n {}\n\n",
139 i + 1,
140 r.title,
141 r.url,
142 r.snippet
143 ));
144 }
145
146 Ok(ToolResult {
147 call_id: String::new(),
148 output: out,
149 success: true,
150 })
151 }
152}
153
154struct SearchResult {
155 title: String,
156 url: String,
157 snippet: String,
158}
159
160fn parse_ddg_results(html: &str, max: usize) -> Vec<SearchResult> {
164 let mut results = Vec::new();
165
166 let mut pos = 0;
167 while results.len() < max {
168 let link_marker = "class=\"result__a\"";
170 let safe_pos = ceil_char_boundary(html, pos);
171 let marker_pos = match html[safe_pos..].find(link_marker) {
172 Some(p) => safe_pos + p,
173 None => break,
174 };
175 let after_marker = ceil_char_boundary(html, marker_pos + link_marker.len());
176
177 let tag_start = html[..marker_pos].rfind('<').unwrap_or(marker_pos);
179 let tag_end = html[after_marker..]
181 .find("</a>")
182 .map(|p| after_marker + p)
183 .unwrap_or(after_marker);
184
185 let safe_tag_end_plus4 = ceil_char_boundary(html, tag_end + 4);
186 let tag_region = &html[tag_start..safe_tag_end_plus4]; let url = if let Some(hp) = tag_region.find("href=\"") {
190 let hs = hp + 6;
191 let he = tag_region[hs..].find('"').map(|e| hs + e).unwrap_or(hs);
192 extract_ddg_url(&tag_region[hs..he])
193 } else {
194 pos = safe_tag_end_plus4;
195 continue;
196 };
197
198 let content_start = html[after_marker..tag_end]
200 .find('>')
201 .map(|p| after_marker + p + 1)
202 .unwrap_or(after_marker);
203 let safe_content_start = ceil_char_boundary(html, content_start);
204 let safe_tag_end = floor_char_boundary(html, tag_end);
205 let title = if safe_content_start <= safe_tag_end {
206 strip_html_tags(&html[safe_content_start..safe_tag_end])
207 } else {
208 String::new()
209 };
210
211 let snippet_marker = "class=\"result__snippet\"";
213 let search_end = ceil_char_boundary(html, (tag_end + 2000).min(html.len()));
214 let safe_tag_end2 = ceil_char_boundary(html, tag_end);
215 let snippet = if let Some(sp) = html[safe_tag_end2..search_end].find(snippet_marker) {
216 let snippet_pos = safe_tag_end2 + sp;
217 let s_start = ceil_char_boundary(
218 html,
219 html[snippet_pos..]
220 .find('>')
221 .map(|p| snippet_pos + p + 1)
222 .unwrap_or(snippet_pos),
223 );
224 let s_end = floor_char_boundary(
225 html,
226 html[s_start..]
227 .find("</a>")
228 .map(|p| s_start + p)
229 .unwrap_or(s_start),
230 );
231 if s_start <= s_end {
232 strip_html_tags(&html[s_start..s_end])
233 } else {
234 String::new()
235 }
236 } else {
237 String::new()
238 };
239
240 if !title.trim().is_empty() && !url.is_empty() && url.starts_with("http") {
241 results.push(SearchResult {
242 title: title.trim().to_string(),
243 url,
244 snippet: snippet.trim().to_string(),
245 });
246 }
247
248 pos = ceil_char_boundary(html, tag_end + 4);
249 }
250
251 results
252}
253
254fn extract_ddg_url(raw: &str) -> String {
256 if let Some(uddg_pos) = raw.find("uddg=") {
258 let start = uddg_pos + 5;
259 let end = raw[start..]
260 .find('&')
261 .map(|e| start + e)
262 .unwrap_or(raw.len());
263 let encoded = &raw[start..end];
264 url_decode(encoded)
265 } else if raw.starts_with("http") {
266 raw.to_string()
267 } else if raw.starts_with("//") {
268 format!("https:{}", raw)
269 } else {
270 raw.to_string()
271 }
272}
273
274fn url_decode(s: &str) -> String {
276 let mut result = String::with_capacity(s.len());
277 let mut chars = s.chars();
278 while let Some(c) = chars.next() {
279 if c == '%' {
280 let hex: String = chars.by_ref().take(2).collect();
281 if let Ok(byte) = u8::from_str_radix(&hex, 16) {
282 result.push(byte as char);
283 } else {
284 result.push('%');
285 result.push_str(&hex);
286 }
287 } else if c == '+' {
288 result.push(' ');
289 } else {
290 result.push(c);
291 }
292 }
293 result
294}
295
296fn strip_html_tags(s: &str) -> String {
298 let mut result = String::with_capacity(s.len());
299 let mut in_tag = false;
300 for c in s.chars() {
301 match c {
302 '<' => in_tag = true,
303 '>' => in_tag = false,
304 _ if !in_tag => result.push(c),
305 _ => {}
306 }
307 }
308 result
309 .replace("&", "&")
310 .replace("<", "<")
311 .replace(">", ">")
312 .replace(""", "\"")
313 .replace("'", "'")
314 .replace(" ", " ")
315 .replace("'", "'")
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321
322 #[test]
323 fn test_parse_ddg_results() {
324 let html = r#"
325 <h2 class="result__title">
326 <a rel="nofollow" class="result__a" href="https://github.com/openclaw">openclaw · GitHub</a>
327 </h2>
328 <a class="result__snippet" href="https://github.com/openclaw">Your personal AI assistant. openclaw has 23 repos.</a>
329 <h2 class="result__title">
330 <a rel="nofollow" class="result__a" href="https://openclaw.ai/">OpenClaw — Personal AI</a>
331 </h2>
332 <a class="result__snippet" href="https://openclaw.ai/">The AI that does things.</a>
333 "#;
334 let results = parse_ddg_results(html, 10);
335 assert_eq!(results.len(), 2);
336 assert_eq!(results[0].title, "openclaw · GitHub");
337 assert_eq!(results[0].url, "https://github.com/openclaw");
338 assert!(results[0].snippet.contains("23 repos"));
339 assert_eq!(results[1].title, "OpenClaw — Personal AI");
340 assert_eq!(results[1].url, "https://openclaw.ai/");
341 }
342
343 #[test]
344 fn test_parse_ddg_empty() {
345 let results = parse_ddg_results("<html><body>no results</body></html>", 10);
346 assert!(results.is_empty());
347 }
348
349 #[test]
350 fn test_strip_html_tags() {
351 assert_eq!(strip_html_tags("hello <b>world</b>"), "hello world");
352 assert_eq!(strip_html_tags("& <"), "& <");
353 }
354}