matrixcode_core/tools/websearch/
mod.rs1mod backends;
6mod client;
7mod parser;
8
9use anyhow::Result;
10use async_trait::async_trait;
11use serde::{Deserialize, Serialize};
12use serde_json::{Value, json};
13use std::time::Duration;
14
15use super::{Tool, ToolDefinition};
16use client::{create_client, load_proxy_from_env};
17use parser::SearchResult;
18
19pub use parser::{SearchResultParser, clean_url};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct WebSearchConfig {
24 pub proxy: Option<String>,
26 pub timeout_secs: u64,
28 pub max_retries: u32,
30 pub enable_fallback: bool,
32}
33
34impl Default for WebSearchConfig {
35 fn default() -> Self {
36 Self {
37 proxy: None,
38 timeout_secs: 30,
39 max_retries: 3,
40 enable_fallback: true,
41 }
42 }
43}
44
45pub struct WebSearchTool {
47 config: WebSearchConfig,
48}
49
50impl Default for WebSearchTool {
51 fn default() -> Self {
52 Self::new()
53 }
54}
55
56impl WebSearchTool {
57 pub fn new() -> Self {
58 Self {
59 config: WebSearchConfig::default(),
60 }
61 }
62
63 pub fn with_config(config: WebSearchConfig) -> Self {
64 Self { config }
65 }
66
67 async fn search_with_retry(
69 &self,
70 query: &str,
71 max_results: usize,
72 ) -> Result<Vec<SearchResult>> {
73 let client = create_client(self.config.proxy.as_deref(), self.config.timeout_secs)?;
74 let mut last_error: Option<anyhow::Error> = None;
75
76 for attempt in 0..self.config.max_retries {
77 if attempt > 0 {
78 let delay = Duration::from_secs(1 << (attempt - 1));
79 tokio::time::sleep(delay).await;
80 log::info!(
81 "WebSearch retry attempt {} after {}s delay",
82 attempt + 1,
83 delay.as_secs()
84 );
85 }
86
87 match backends::search_duckduckgo(&client, query, max_results).await {
88 Ok(results) if !results.is_empty() => {
89 log::info!("WebSearch succeeded on attempt {}", attempt + 1);
90 return Ok(results);
91 }
92 Ok(_) => {
93 log::warn!(
94 "WebSearch returned empty results on attempt {}",
95 attempt + 1
96 );
97 last_error = Some(anyhow::anyhow!("No search results found"));
98 }
99 Err(e) => {
100 log::warn!("WebSearch failed on attempt {}: {}", attempt + 1, e);
101 last_error = Some(e);
102 }
103 }
104 }
105
106 if self.config.enable_fallback {
108 log::info!("Trying fallback search backends...");
109
110 if let Ok(results) = backends::search_wikipedia(&client, query, max_results).await
111 && !results.is_empty()
112 {
113 log::info!("Fallback search succeeded via Wikipedia");
114 return Ok(results);
115 }
116
117 if let Ok(results) = backends::search_searxng(&client, query, max_results).await
118 && !results.is_empty()
119 {
120 log::info!("Fallback search succeeded via SearXNG");
121 return Ok(results);
122 }
123 }
124
125 Err(last_error.unwrap_or_else(|| {
126 anyhow::anyhow!("WebSearch failed after {} retries", self.config.max_retries)
127 }))
128 }
129}
130
131#[async_trait]
132impl Tool for WebSearchTool {
133 fn definition(&self) -> ToolDefinition {
134 ToolDefinition {
135 name: "websearch".to_string(),
136 description: "使用 DuckDuckGo 搜索网络信息。返回包含标题、URL 和摘要的搜索结果列表。用于查找互联网上的最新信息。支持代理和自动重试。".to_string(),
137 parameters: json!({
138 "type": "object",
139 "properties": {
140 "query": {
141 "type": "string",
142 "description": "搜索查询"
143 },
144 "max_results": {
145 "type": "integer",
146 "description": "最大返回结果数(默认 5,最大 10)"
147 },
148 "use_proxy": {
149 "type": "boolean",
150 "description": "是否使用代理(默认自动检测环境变量 HTTP_PROXY)"
151 }
152 },
153 "required": ["query"]
154 }),
155 ..Default::default()
156 }
157 }
158
159 async fn execute(&self, params: Value) -> Result<String> {
160 let query = params["query"]
161 .as_str()
162 .ok_or_else(|| anyhow::anyhow!("missing 'query' parameter"))?;
163 let max_results = params["max_results"].as_u64().unwrap_or(5).min(10) as usize;
164 let use_proxy = params["use_proxy"].as_bool().unwrap_or(true);
165
166 let mut config = self.config.clone();
167 if use_proxy && config.proxy.is_none() {
168 config.proxy = load_proxy_from_env();
169 if config.proxy.is_some() {
170 log::info!("WebSearch using proxy from environment: {:?}", config.proxy);
171 }
172 }
173
174 let tool = Self::with_config(config);
175 let results = tool.search_with_retry(query, max_results).await?;
176
177 if results.is_empty() {
178 return Ok("No results found. Suggestions:\n1. Check your network connection\n2. Try enabling proxy (set HTTP_PROXY env var)\n3. Try a different query".to_string());
179 }
180
181 let output = results
182 .iter()
183 .enumerate()
184 .map(|(i, r)| {
185 let mut s = format!("{}. {}\n {}", i + 1, r.title, r.url);
186 if let Some(ref snippet) = r.snippet {
187 s.push_str(&format!("\n {}", snippet));
188 }
189 s
190 })
191 .collect::<Vec<_>>()
192 .join("\n\n");
193
194 Ok(output)
195 }
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201 use parser::{clean_url, strip_html_tags};
202
203 #[test]
204 fn test_strip_html_tags() {
205 assert_eq!(strip_html_tags("<b>hello</b>"), "hello");
206 assert_eq!(strip_html_tags("a & b"), "a & b");
207 assert_eq!(strip_html_tags(" <span>test</span> "), "test");
208 }
209
210 #[test]
211 fn test_clean_url() {
212 let redirect_url = "https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com&rut=abc";
213 assert_eq!(clean_url(redirect_url), "https://example.com");
214
215 let normal_url = "https://example.com/page";
216 assert_eq!(clean_url(normal_url), "https://example.com/page");
217 }
218
219 #[test]
220 fn test_config_default() {
221 let config = WebSearchConfig::default();
222 assert_eq!(config.timeout_secs, 30);
223 assert_eq!(config.max_retries, 3);
224 assert!(config.enable_fallback);
225 }
226}
227
228#[cfg(test)]
229mod integration_tests {
230 use super::*;
231 use tokio;
232
233 #[tokio::test]
234 #[ignore]
235 async fn test_real_websearch_full() {
236 let tool = WebSearchTool::new();
237 let params = json!({
238 "query": "Rust programming",
239 "max_results": 5
240 });
241
242 match tool.execute(params).await {
243 Ok(result) => {
244 println!("Full websearch result:\n{}", result);
245 assert!(
246 !result.contains("No results found"),
247 "Should find results via Wikipedia fallback"
248 );
249 }
250 Err(e) => {
251 eprintln!("Error: {:?}", e);
252 panic!("Websearch should succeed with Wikipedia fallback");
253 }
254 }
255 }
256}