1use super::traits::{Tool, ToolResult};
2use super::web_search_provider_routing::{WebSearchProviderRoute, resolve_web_search_provider};
3use async_trait::async_trait;
4use regex::Regex;
5use serde_json::json;
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9pub struct WebSearchTool {
18 provider: String,
20 boot_brave_api_key: Option<String>,
22 searxng_instance_url: Option<String>,
24 max_results: usize,
25 timeout_secs: u64,
26 config_path: PathBuf,
28 secrets_encrypt: bool,
30}
31
32impl WebSearchTool {
33 pub fn new(
34 provider: String,
35 brave_api_key: Option<String>,
36 max_results: usize,
37 timeout_secs: u64,
38 ) -> Self {
39 Self {
40 provider: provider.trim().to_lowercase(),
41 boot_brave_api_key: brave_api_key,
42 searxng_instance_url: None,
43 max_results: max_results.clamp(1, 10),
44 timeout_secs: timeout_secs.max(1),
45 config_path: PathBuf::new(),
46 secrets_encrypt: false,
47 }
48 }
49
50 pub fn new_with_config(
56 provider: String,
57 brave_api_key: Option<String>,
58 searxng_instance_url: Option<String>,
59 max_results: usize,
60 timeout_secs: u64,
61 config_path: PathBuf,
62 secrets_encrypt: bool,
63 ) -> Self {
64 Self {
65 provider: provider.trim().to_lowercase(),
66 boot_brave_api_key: brave_api_key,
67 searxng_instance_url,
68 max_results: max_results.clamp(1, 10),
69 timeout_secs: timeout_secs.max(1),
70 config_path,
71 secrets_encrypt,
72 }
73 }
74
75 fn resolve_brave_api_key(&self) -> anyhow::Result<String> {
79 if let Some(ref key) = self.boot_brave_api_key {
81 if !key.is_empty() && !crate::security::SecretStore::is_encrypted(key) {
82 return Ok(key.clone());
83 }
84 }
85
86 self.reload_brave_api_key()
88 }
89
90 fn reload_brave_api_key(&self) -> anyhow::Result<String> {
92 let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
93 anyhow::anyhow!(
94 "Failed to read config file {} for Brave API key: {e}",
95 self.config_path.display()
96 )
97 })?;
98
99 let config: crate::config::Config = toml::from_str(&contents).map_err(|e| {
100 anyhow::anyhow!(
101 "Failed to parse config file {} for Brave API key: {e}",
102 self.config_path.display()
103 )
104 })?;
105
106 let raw_key = config
107 .web_search
108 .brave_api_key
109 .filter(|k| !k.is_empty())
110 .ok_or_else(|| anyhow::anyhow!("Brave API key not configured"))?;
111
112 if crate::security::SecretStore::is_encrypted(&raw_key) {
114 let construct_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
115 let store = crate::security::SecretStore::new(construct_dir, self.secrets_encrypt);
116 let plaintext = store.decrypt(&raw_key)?;
117 if plaintext.is_empty() {
118 anyhow::bail!("Brave API key not configured (decrypted value is empty)");
119 }
120 Ok(plaintext)
121 } else {
122 Ok(raw_key)
123 }
124 }
125
126 async fn search_duckduckgo(&self, query: &str) -> anyhow::Result<String> {
127 let encoded_query = urlencoding::encode(query);
128 let search_url = format!("https://html.duckduckgo.com/html/?q={}", encoded_query);
129
130 let builder = reqwest::Client::builder()
131 .timeout(Duration::from_secs(self.timeout_secs))
132 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
133 let builder = crate::config::apply_runtime_proxy_to_builder(builder, "tool.web_search");
134 let client = builder.build()?;
135
136 let response = client.get(&search_url).send().await?;
137
138 if !response.status().is_success() {
139 anyhow::bail!(
140 "DuckDuckGo search failed with status: {}",
141 response.status()
142 );
143 }
144
145 let html = response.text().await?;
146 self.parse_duckduckgo_results(&html, query)
147 }
148
149 fn parse_duckduckgo_results(&self, html: &str, query: &str) -> anyhow::Result<String> {
150 let link_regex = Regex::new(
152 r#"<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>"#,
153 )?;
154
155 let snippet_regex = Regex::new(r#"<a class="result__snippet[^"]*"[^>]*>([\s\S]*?)</a>"#)?;
157
158 let link_matches: Vec<_> = link_regex
159 .captures_iter(html)
160 .take(self.max_results + 2)
161 .collect();
162
163 let snippet_matches: Vec<_> = snippet_regex
164 .captures_iter(html)
165 .take(self.max_results + 2)
166 .collect();
167
168 if link_matches.is_empty() {
169 return Ok(format!("No results found for: {}", query));
170 }
171
172 let mut lines = vec![format!("Search results for: {} (via DuckDuckGo)", query)];
173
174 let count = link_matches.len().min(self.max_results);
175
176 for i in 0..count {
177 let caps = &link_matches[i];
178 let url_str = decode_ddg_redirect_url(&caps[1]);
179 let title = strip_tags(&caps[2]);
180
181 lines.push(format!("{}. {}", i + 1, title.trim()));
182 lines.push(format!(" {}", url_str.trim()));
183
184 if i < snippet_matches.len() {
186 let snippet = strip_tags(&snippet_matches[i][1]);
187 let snippet = snippet.trim();
188 if !snippet.is_empty() {
189 lines.push(format!(" {}", snippet));
190 }
191 }
192 }
193
194 Ok(lines.join("\n"))
195 }
196
197 async fn search_brave(&self, query: &str) -> anyhow::Result<String> {
198 let api_key = self.resolve_brave_api_key()?;
199
200 let encoded_query = urlencoding::encode(query);
201 let search_url = format!(
202 "https://api.search.brave.com/res/v1/web/search?q={}&count={}",
203 encoded_query, self.max_results
204 );
205
206 let builder = reqwest::Client::builder().timeout(Duration::from_secs(self.timeout_secs));
207 let builder = crate::config::apply_runtime_proxy_to_builder(builder, "tool.web_search");
208 let client = builder.build()?;
209
210 let response = client
211 .get(&search_url)
212 .header("Accept", "application/json")
213 .header("X-Subscription-Token", &api_key)
214 .send()
215 .await?;
216
217 if !response.status().is_success() {
218 anyhow::bail!("Brave search failed with status: {}", response.status());
219 }
220
221 let json: serde_json::Value = response.json().await?;
222 self.parse_brave_results(&json, query)
223 }
224
225 fn parse_brave_results(&self, json: &serde_json::Value, query: &str) -> anyhow::Result<String> {
226 let results = json
227 .get("web")
228 .and_then(|w| w.get("results"))
229 .and_then(|r| r.as_array())
230 .ok_or_else(|| anyhow::anyhow!("Invalid Brave API response"))?;
231
232 if results.is_empty() {
233 return Ok(format!("No results found for: {}", query));
234 }
235
236 let mut lines = vec![format!("Search results for: {} (via Brave)", query)];
237
238 for (i, result) in results.iter().take(self.max_results).enumerate() {
239 let title = result
240 .get("title")
241 .and_then(|t| t.as_str())
242 .unwrap_or("No title");
243 let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
244 let description = result
245 .get("description")
246 .and_then(|d| d.as_str())
247 .unwrap_or("");
248
249 lines.push(format!("{}. {}", i + 1, title));
250 lines.push(format!(" {}", url));
251 if !description.is_empty() {
252 lines.push(format!(" {}", description));
253 }
254 }
255
256 Ok(lines.join("\n"))
257 }
258
259 fn resolve_searxng_instance_url(&self) -> anyhow::Result<String> {
262 if let Some(ref url) = self.searxng_instance_url {
263 if !url.is_empty() {
264 return Ok(url.clone());
265 }
266 }
267
268 let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
270 anyhow::anyhow!(
271 "Failed to read config file {} for SearXNG instance URL: {e}",
272 self.config_path.display()
273 )
274 })?;
275
276 let config: crate::config::Config = toml::from_str(&contents).map_err(|e| {
277 anyhow::anyhow!(
278 "Failed to parse config file {} for SearXNG instance URL: {e}",
279 self.config_path.display()
280 )
281 })?;
282
283 config
284 .web_search
285 .searxng_instance_url
286 .filter(|u| !u.is_empty())
287 .ok_or_else(|| {
288 anyhow::anyhow!(
289 "SearXNG instance URL not configured. Set [web_search] searxng_instance_url \
290 in config.toml or the SEARXNG_INSTANCE_URL environment variable."
291 )
292 })
293 }
294
295 async fn search_searxng(&self, query: &str) -> anyhow::Result<String> {
296 let instance_url = self.resolve_searxng_instance_url()?;
297 let base_url = instance_url.trim_end_matches('/');
298
299 let encoded_query = urlencoding::encode(query);
300 let search_url = format!(
301 "{}/search?q={}&format=json&pageno=1",
302 base_url, encoded_query
303 );
304
305 let builder = reqwest::Client::builder()
306 .timeout(Duration::from_secs(self.timeout_secs))
307 .user_agent("Construct/1.0");
308 let builder = crate::config::apply_runtime_proxy_to_builder(builder, "tool.web_search");
309 let client = builder.build()?;
310
311 let response = client
312 .get(&search_url)
313 .header("Accept", "application/json")
314 .send()
315 .await?;
316
317 if !response.status().is_success() {
318 anyhow::bail!("SearXNG search failed with status: {}", response.status());
319 }
320
321 let json: serde_json::Value = response.json().await?;
322 self.parse_searxng_results(&json, query)
323 }
324
325 fn parse_searxng_results(
326 &self,
327 json: &serde_json::Value,
328 query: &str,
329 ) -> anyhow::Result<String> {
330 let results = json
331 .get("results")
332 .and_then(|r| r.as_array())
333 .ok_or_else(|| anyhow::anyhow!("Invalid SearXNG API response"))?;
334
335 if results.is_empty() {
336 return Ok(format!("No results found for: {}", query));
337 }
338
339 let mut lines = vec![format!("Search results for: {} (via SearXNG)", query)];
340
341 for (i, result) in results.iter().take(self.max_results).enumerate() {
342 let title = result
343 .get("title")
344 .and_then(|t| t.as_str())
345 .unwrap_or("No title");
346 let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
347 let content = result.get("content").and_then(|c| c.as_str()).unwrap_or("");
348
349 lines.push(format!("{}. {}", i + 1, title));
350 lines.push(format!(" {}", url));
351 if !content.is_empty() {
352 lines.push(format!(" {}", content));
353 }
354 }
355
356 Ok(lines.join("\n"))
357 }
358}
359
360fn decode_ddg_redirect_url(raw_url: &str) -> String {
361 if let Some(index) = raw_url.find("uddg=") {
362 let encoded = &raw_url[index + 5..];
363 let encoded = encoded.split('&').next().unwrap_or(encoded);
364 if let Ok(decoded) = urlencoding::decode(encoded) {
365 return decoded.into_owned();
366 }
367 }
368
369 raw_url.to_string()
370}
371
372fn strip_tags(content: &str) -> String {
373 let re = Regex::new(r"<[^>]+>").unwrap();
374 re.replace_all(content, "").to_string()
375}
376
377#[async_trait]
378impl Tool for WebSearchTool {
379 fn name(&self) -> &str {
380 "web_search_tool"
381 }
382
383 fn description(&self) -> &str {
384 "Search the web for information. Returns relevant search results with titles, URLs, and descriptions. Use this to find current information, news, or research topics."
385 }
386
387 fn parameters_schema(&self) -> serde_json::Value {
388 json!({
389 "type": "object",
390 "properties": {
391 "query": {
392 "type": "string",
393 "description": "The search query. Be specific for better results."
394 }
395 },
396 "required": ["query"]
397 })
398 }
399
400 async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
401 let query = args
402 .get("query")
403 .and_then(|q| q.as_str())
404 .ok_or_else(|| anyhow::anyhow!("Missing required parameter: query"))?;
405
406 if query.trim().is_empty() {
407 anyhow::bail!("Search query cannot be empty");
408 }
409
410 tracing::info!("Searching web for: {}", query);
411
412 let resolution = resolve_web_search_provider(&self.provider);
413 if resolution.used_fallback {
414 tracing::warn!(
415 "Unknown web search provider '{}'; falling back to '{}'",
416 self.provider,
417 resolution.canonical_provider
418 );
419 }
420
421 let result = match resolution.route {
422 WebSearchProviderRoute::DuckDuckGo => self.search_duckduckgo(query).await?,
423 WebSearchProviderRoute::Brave => self.search_brave(query).await?,
424 WebSearchProviderRoute::SearXNG => self.search_searxng(query).await?,
425 };
426
427 Ok(ToolResult {
428 success: true,
429 output: result,
430 error: None,
431 })
432 }
433}
434
435#[cfg(test)]
436mod tests {
437 use super::*;
438
439 #[test]
440 fn test_tool_name() {
441 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
442 assert_eq!(tool.name(), "web_search_tool");
443 }
444
445 #[test]
446 fn test_tool_description() {
447 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
448 assert!(tool.description().contains("Search the web"));
449 }
450
451 #[test]
452 fn test_parameters_schema() {
453 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
454 let schema = tool.parameters_schema();
455 assert_eq!(schema["type"], "object");
456 assert!(schema["properties"]["query"].is_object());
457 }
458
459 #[test]
460 fn test_strip_tags() {
461 let html = "<b>Hello</b> <i>World</i>";
462 assert_eq!(strip_tags(html), "Hello World");
463 }
464
465 #[test]
466 fn test_parse_duckduckgo_results_empty() {
467 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
468 let result = tool
469 .parse_duckduckgo_results("<html>No results here</html>", "test")
470 .unwrap();
471 assert!(result.contains("No results found"));
472 }
473
474 #[test]
475 fn test_parse_duckduckgo_results_with_data() {
476 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
477 let html = r#"
478 <a class="result__a" href="https://example.com">Example Title</a>
479 <a class="result__snippet">This is a description</a>
480 "#;
481 let result = tool.parse_duckduckgo_results(html, "test").unwrap();
482 assert!(result.contains("Example Title"));
483 assert!(result.contains("https://example.com"));
484 }
485
486 #[test]
487 fn test_parse_duckduckgo_results_decodes_redirect_url() {
488 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
489 let html = r#"
490 <a class="result__a" href="https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fpath%3Fa%3D1&rut=test">Example Title</a>
491 <a class="result__snippet">This is a description</a>
492 "#;
493 let result = tool.parse_duckduckgo_results(html, "test").unwrap();
494 assert!(result.contains("https://example.com/path?a=1"));
495 assert!(!result.contains("rut=test"));
496 }
497
498 #[test]
499 fn test_constructor_clamps_web_search_limits() {
500 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 0, 0);
501 let html = r#"
502 <a class="result__a" href="https://example.com">Example Title</a>
503 <a class="result__snippet">This is a description</a>
504 "#;
505 let result = tool.parse_duckduckgo_results(html, "test").unwrap();
506 assert!(result.contains("Example Title"));
507 }
508
509 #[tokio::test]
510 async fn test_execute_missing_query() {
511 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
512 let result = tool.execute(json!({})).await;
513 assert!(result.is_err());
514 }
515
516 #[tokio::test]
517 async fn test_execute_empty_query() {
518 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
519 let result = tool.execute(json!({"query": ""})).await;
520 assert!(result.is_err());
521 }
522
523 #[tokio::test]
524 async fn test_execute_brave_without_api_key() {
525 let tool = WebSearchTool::new("brave".to_string(), None, 5, 15);
526 let result = tool.execute(json!({"query": "test"})).await;
527 assert!(result.is_err());
528 assert!(result.unwrap_err().to_string().contains("API key"));
529 }
530
531 #[test]
532 fn test_resolve_brave_api_key_uses_boot_key() {
533 let tool = WebSearchTool::new(
534 "brave".to_string(),
535 Some("sk-plaintext-key".to_string()),
536 5,
537 15,
538 );
539 let key = tool.resolve_brave_api_key().unwrap();
540 assert_eq!(key, "sk-plaintext-key");
541 }
542
543 #[test]
544 fn test_resolve_brave_api_key_reloads_from_config() {
545 let tmp = tempfile::TempDir::new().unwrap();
546 let config_path = tmp.path().join("config.toml");
547 std::fs::write(
548 &config_path,
549 "[web_search]\nbrave_api_key = \"fresh-key-from-disk\"\n",
550 )
551 .unwrap();
552
553 let tool = WebSearchTool::new_with_config(
555 "brave".to_string(),
556 None,
557 None,
558 5,
559 15,
560 config_path,
561 false,
562 );
563 let key = tool.resolve_brave_api_key().unwrap();
564 assert_eq!(key, "fresh-key-from-disk");
565 }
566
567 #[test]
568 fn test_resolve_brave_api_key_decrypts_encrypted_key() {
569 let tmp = tempfile::TempDir::new().unwrap();
570 let store = crate::security::SecretStore::new(tmp.path(), true);
571 let encrypted = store.encrypt("brave-secret-key").unwrap();
572
573 let config_path = tmp.path().join("config.toml");
574 std::fs::write(
575 &config_path,
576 format!("[web_search]\nbrave_api_key = \"{}\"\n", encrypted),
577 )
578 .unwrap();
579
580 let tool = WebSearchTool::new_with_config(
582 "brave".to_string(),
583 Some(encrypted),
584 None,
585 5,
586 15,
587 config_path,
588 true,
589 );
590 let key = tool.resolve_brave_api_key().unwrap();
591 assert_eq!(key, "brave-secret-key");
592 }
593
594 #[tokio::test]
595 async fn test_execute_searxng_without_instance_url() {
596 let tmp = tempfile::TempDir::new().unwrap();
597 let config_path = tmp.path().join("config.toml");
598 std::fs::write(&config_path, "[web_search]\n").unwrap();
599
600 let tool = WebSearchTool::new_with_config(
601 "searxng".to_string(),
602 None,
603 None,
604 5,
605 15,
606 config_path,
607 false,
608 );
609 let result = tool.execute(json!({"query": "test"})).await;
610 assert!(result.is_err());
611 assert!(
612 result
613 .unwrap_err()
614 .to_string()
615 .contains("SearXNG instance URL not configured")
616 );
617 }
618
619 #[test]
620 fn test_parse_searxng_results_empty() {
621 let tool = WebSearchTool::new("searxng".to_string(), None, 5, 15);
622 let json = serde_json::json!({"results": []});
623 let result = tool.parse_searxng_results(&json, "test").unwrap();
624 assert!(result.contains("No results found"));
625 }
626
627 #[test]
628 fn test_parse_searxng_results_with_data() {
629 let tool = WebSearchTool::new("searxng".to_string(), None, 5, 15);
630 let json = serde_json::json!({
631 "results": [
632 {
633 "title": "SearXNG Example",
634 "url": "https://example.com",
635 "content": "A privacy-respecting metasearch engine"
636 },
637 {
638 "title": "Another Result",
639 "url": "https://example.org",
640 "content": "More information here"
641 }
642 ]
643 });
644 let result = tool.parse_searxng_results(&json, "test").unwrap();
645 assert!(result.contains("SearXNG Example"));
646 assert!(result.contains("https://example.com"));
647 assert!(result.contains("A privacy-respecting metasearch engine"));
648 assert!(result.contains("via SearXNG"));
649 }
650
651 #[test]
652 fn test_parse_searxng_results_invalid_response() {
653 let tool = WebSearchTool::new("searxng".to_string(), None, 5, 15);
654 let json = serde_json::json!({"error": "bad request"});
655 let result = tool.parse_searxng_results(&json, "test");
656 assert!(result.is_err());
657 assert!(
658 result
659 .unwrap_err()
660 .to_string()
661 .contains("Invalid SearXNG API response")
662 );
663 }
664
665 #[test]
666 fn test_resolve_searxng_instance_url_from_boot() {
667 let tool = WebSearchTool {
668 provider: "searxng".to_string(),
669 boot_brave_api_key: None,
670 searxng_instance_url: Some("https://searx.example.com".to_string()),
671 max_results: 5,
672 timeout_secs: 15,
673 config_path: PathBuf::new(),
674 secrets_encrypt: false,
675 };
676 let url = tool.resolve_searxng_instance_url().unwrap();
677 assert_eq!(url, "https://searx.example.com");
678 }
679
680 #[test]
681 fn test_resolve_searxng_instance_url_reloads_from_config() {
682 let tmp = tempfile::TempDir::new().unwrap();
683 let config_path = tmp.path().join("config.toml");
684 std::fs::write(
685 &config_path,
686 "[web_search]\nsearxng_instance_url = \"https://search.local\"\n",
687 )
688 .unwrap();
689
690 let tool = WebSearchTool::new_with_config(
691 "searxng".to_string(),
692 None,
693 None,
694 5,
695 15,
696 config_path,
697 false,
698 );
699 let url = tool.resolve_searxng_instance_url().unwrap();
700 assert_eq!(url, "https://search.local");
701 }
702
703 #[test]
704 fn test_resolve_brave_api_key_picks_up_runtime_update() {
705 let tmp = tempfile::TempDir::new().unwrap();
706 let config_path = tmp.path().join("config.toml");
707
708 std::fs::write(&config_path, "[web_search]\n").unwrap();
710
711 let tool = WebSearchTool::new_with_config(
712 "brave".to_string(),
713 None,
714 None,
715 5,
716 15,
717 config_path.clone(),
718 false,
719 );
720
721 assert!(tool.resolve_brave_api_key().is_err());
723
724 std::fs::write(
726 &config_path,
727 "[web_search]\nbrave_api_key = \"runtime-updated-key\"\n",
728 )
729 .unwrap();
730
731 let key = tool.resolve_brave_api_key().unwrap();
733 assert_eq!(key, "runtime-updated-key");
734 }
735}