matrixcode_core/tools/websearch/
mod.rs1mod backends;
6mod client;
7mod parser;
8
9use anyhow::Result;
10use async_trait::async_trait;
11use serde::{Deserialize, Serialize};
12use serde_json::{Value, json};
13use std::time::Duration;
14
15use super::{Tool, ToolDefinition};
16use client::{create_client, load_proxy_from_env};
17use parser::SearchResult;
18
19pub use parser::{SearchResultParser, clean_url};
20
21const DEFAULT_TIMEOUT_SECS: u64 = 30;
23const MAX_TIMEOUT_SECS: u64 = 120;
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct WebSearchConfig {
27 pub proxy: Option<String>,
29 pub timeout_secs: u64,
31 pub max_retries: u32,
33 pub enable_fallback: bool,
35}
36
37impl Default for WebSearchConfig {
38 fn default() -> Self {
39 Self {
40 proxy: None,
41 timeout_secs: DEFAULT_TIMEOUT_SECS,
42 max_retries: 3,
43 enable_fallback: true,
44 }
45 }
46}
47
48pub struct WebSearchTool {
50 config: WebSearchConfig,
51}
52
53impl Default for WebSearchTool {
54 fn default() -> Self {
55 Self::new()
56 }
57}
58
59impl WebSearchTool {
60 pub fn new() -> Self {
61 Self {
62 config: WebSearchConfig::default(),
63 }
64 }
65
66 pub fn with_config(config: WebSearchConfig) -> Self {
67 Self { config }
68 }
69
70 async fn search_with_retry(
72 &self,
73 query: &str,
74 max_results: usize,
75 ) -> Result<Vec<SearchResult>> {
76 let client = create_client(self.config.proxy.as_deref(), self.config.timeout_secs)?;
77 let mut last_error: Option<anyhow::Error> = None;
78
79 for attempt in 0..self.config.max_retries {
80 if attempt > 0 {
81 let delay = Duration::from_secs(1 << (attempt - 1));
82 tokio::time::sleep(delay).await;
83 log::info!(
84 "WebSearch retry attempt {} after {}s delay",
85 attempt + 1,
86 delay.as_secs()
87 );
88 }
89
90 match backends::search_duckduckgo(&client, query, max_results).await {
91 Ok(results) if !results.is_empty() => {
92 log::info!("WebSearch succeeded on attempt {}", attempt + 1);
93 return Ok(results);
94 }
95 Ok(_) => {
96 log::warn!(
97 "WebSearch returned empty results on attempt {}",
98 attempt + 1
99 );
100 last_error = Some(anyhow::anyhow!("No search results found"));
101 }
102 Err(e) => {
103 log::warn!("WebSearch failed on attempt {}: {}", attempt + 1, e);
104 last_error = Some(e);
105 }
106 }
107 }
108
109 if self.config.enable_fallback {
111 log::info!("Trying fallback search backends...");
112
113 if let Ok(results) = backends::search_wikipedia(&client, query, max_results).await
114 && !results.is_empty()
115 {
116 log::info!("Fallback search succeeded via Wikipedia");
117 return Ok(results);
118 }
119
120 if let Ok(results) = backends::search_searxng(&client, query, max_results).await
121 && !results.is_empty()
122 {
123 log::info!("Fallback search succeeded via SearXNG");
124 return Ok(results);
125 }
126 }
127
128 Err(last_error.unwrap_or_else(|| {
129 anyhow::anyhow!("WebSearch failed after {} retries", self.config.max_retries)
130 }))
131 }
132}
133
134#[async_trait]
135impl Tool for WebSearchTool {
136 fn definition(&self) -> ToolDefinition {
137 ToolDefinition {
138 name: "websearch".to_string(),
139 description: "使用 DuckDuckGo 搜索网络信息。返回包含标题、URL 和摘要的搜索结果列表。用于查找互联网上的最新信息。支持代理、自动重试和自定义超时。".to_string(),
140 parameters: json!({
141 "type": "object",
142 "properties": {
143 "query": {
144 "type": "string",
145 "description": "搜索查询"
146 },
147 "max_results": {
148 "type": "integer",
149 "description": "最大返回结果数(默认 5,最大 10)"
150 },
151 "use_proxy": {
152 "type": "boolean",
153 "description": "是否使用代理(默认自动检测环境变量 HTTP_PROXY)"
154 },
155 "timeout_secs": {
156 "type": "integer",
157 "description": format!("超时时间(秒,默认 {},最大 {})", DEFAULT_TIMEOUT_SECS, MAX_TIMEOUT_SECS)
158 }
159 },
160 "required": ["query"]
161 }),
162 ..Default::default()
163 }
164 }
165
166 async fn execute(&self, params: Value) -> Result<String> {
167 let query = params["query"]
168 .as_str()
169 .ok_or_else(|| anyhow::anyhow!("missing 'query' parameter"))?;
170 let max_results = params["max_results"].as_u64().unwrap_or(5).min(10) as usize;
171 let use_proxy = params["use_proxy"].as_bool().unwrap_or(true);
172 let timeout_secs = params["timeout_secs"]
173 .as_u64()
174 .unwrap_or(DEFAULT_TIMEOUT_SECS)
175 .min(MAX_TIMEOUT_SECS);
176
177 let mut config = self.config.clone();
178 config.timeout_secs = timeout_secs;
179 if use_proxy && config.proxy.is_none() {
180 config.proxy = load_proxy_from_env();
181 if config.proxy.is_some() {
182 log::info!("WebSearch using proxy from environment: {:?}", config.proxy);
183 }
184 }
185
186 let tool = Self::with_config(config);
187 let results = tool.search_with_retry(query, max_results).await?;
188
189 if results.is_empty() {
190 return Ok("No results found. Suggestions:\n1. Check your network connection\n2. Try enabling proxy (set HTTP_PROXY env var)\n3. Try a different query".to_string());
191 }
192
193 let output = results
194 .iter()
195 .enumerate()
196 .map(|(i, r)| {
197 let mut s = format!("{}. {}\n {}", i + 1, r.title, r.url);
198 if let Some(ref snippet) = r.snippet {
199 s.push_str(&format!("\n {}", snippet));
200 }
201 s
202 })
203 .collect::<Vec<_>>()
204 .join("\n\n");
205
206 Ok(output)
207 }
208}
209
210#[cfg(test)]
211mod tests {
212 use super::*;
213 use parser::{clean_url, strip_html_tags};
214
215 #[test]
216 fn test_strip_html_tags() {
217 assert_eq!(strip_html_tags("<b>hello</b>"), "hello");
218 assert_eq!(strip_html_tags("a & b"), "a & b");
219 assert_eq!(strip_html_tags(" <span>test</span> "), "test");
220 }
221
222 #[test]
223 fn test_clean_url() {
224 let redirect_url = "https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com&rut=abc";
225 assert_eq!(clean_url(redirect_url), "https://example.com");
226
227 let normal_url = "https://example.com/page";
228 assert_eq!(clean_url(normal_url), "https://example.com/page");
229 }
230
231 #[test]
232 fn test_config_default() {
233 let config = WebSearchConfig::default();
234 assert_eq!(config.timeout_secs, 30);
235 assert_eq!(config.max_retries, 3);
236 assert!(config.enable_fallback);
237 }
238}
239
240#[cfg(test)]
241mod integration_tests {
242 use super::*;
243 use tokio;
244
245 #[tokio::test]
246 #[ignore]
247 async fn test_real_websearch_full() {
248 let tool = WebSearchTool::new();
249 let params = json!({
250 "query": "Rust programming",
251 "max_results": 5
252 });
253
254 match tool.execute(params).await {
255 Ok(result) => {
256 println!("Full websearch result:\n{}", result);
257 assert!(
258 !result.contains("No results found"),
259 "Should find results via Wikipedia fallback"
260 );
261 }
262 Err(e) => {
263 eprintln!("Error: {:?}", e);
264 panic!("Websearch should succeed with Wikipedia fallback");
265 }
266 }
267 }
268}