matrixcode_core/tools/websearch/
mod.rs1mod backends;
6mod client;
7mod parser;
8
9use anyhow::Result;
10use async_trait::async_trait;
11use serde::{Deserialize, Serialize};
12use serde_json::{Value, json};
13use std::time::Duration;
14
15use super::{Tool, ToolDefinition};
16use client::{create_client, load_proxy_from_env};
17use parser::SearchResult;
18
19pub use parser::{SearchResultParser, clean_url};
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct WebSearchConfig {
24 pub proxy: Option<String>,
26 pub timeout_secs: u64,
28 pub max_retries: u32,
30 pub enable_fallback: bool,
32}
33
34impl Default for WebSearchConfig {
35 fn default() -> Self {
36 Self {
37 proxy: None,
38 timeout_secs: 30,
39 max_retries: 3,
40 enable_fallback: true,
41 }
42 }
43}
44
45pub struct WebSearchTool {
47 config: WebSearchConfig,
48}
49
50impl Default for WebSearchTool {
51 fn default() -> Self {
52 Self::new()
53 }
54}
55
56impl WebSearchTool {
57 pub fn new() -> Self {
58 Self { config: WebSearchConfig::default() }
59 }
60
61 pub fn with_config(config: WebSearchConfig) -> Self {
62 Self { config }
63 }
64
65 async fn search_with_retry(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
67 let client = create_client(self.config.proxy.as_deref(), self.config.timeout_secs)?;
68 let mut last_error: Option<anyhow::Error> = None;
69
70 for attempt in 0..self.config.max_retries {
71 if attempt > 0 {
72 let delay = Duration::from_secs(1 << (attempt - 1));
73 tokio::time::sleep(delay).await;
74 log::info!("WebSearch retry attempt {} after {}s delay", attempt + 1, delay.as_secs());
75 }
76
77 match backends::search_duckduckgo(&client, query, max_results).await {
78 Ok(results) if !results.is_empty() => {
79 log::info!("WebSearch succeeded on attempt {}", attempt + 1);
80 return Ok(results);
81 }
82 Ok(_) => {
83 log::warn!("WebSearch returned empty results on attempt {}", attempt + 1);
84 last_error = Some(anyhow::anyhow!("No search results found"));
85 }
86 Err(e) => {
87 log::warn!("WebSearch failed on attempt {}: {}", attempt + 1, e);
88 last_error = Some(e);
89 }
90 }
91 }
92
93 if self.config.enable_fallback {
95 log::info!("Trying fallback search backends...");
96
97 if let Ok(results) = backends::search_wikipedia(&client, query, max_results).await
98 && !results.is_empty() {
99 log::info!("Fallback search succeeded via Wikipedia");
100 return Ok(results);
101 }
102
103 if let Ok(results) = backends::search_searxng(&client, query, max_results).await
104 && !results.is_empty() {
105 log::info!("Fallback search succeeded via SearXNG");
106 return Ok(results);
107 }
108 }
109
110 Err(last_error
111 .unwrap_or_else(|| anyhow::anyhow!("WebSearch failed after {} retries", self.config.max_retries)))
112 }
113}
114
115#[async_trait]
116impl Tool for WebSearchTool {
117 fn definition(&self) -> ToolDefinition {
118 ToolDefinition {
119 name: "websearch".to_string(),
120 description: "使用 DuckDuckGo 搜索网络信息。返回包含标题、URL 和摘要的搜索结果列表。用于查找互联网上的最新信息。支持代理和自动重试。".to_string(),
121 parameters: json!({
122 "type": "object",
123 "properties": {
124 "query": {
125 "type": "string",
126 "description": "搜索查询"
127 },
128 "max_results": {
129 "type": "integer",
130 "description": "最大返回结果数(默认 5,最大 10)"
131 },
132 "use_proxy": {
133 "type": "boolean",
134 "description": "是否使用代理(默认自动检测环境变量 HTTP_PROXY)"
135 }
136 },
137 "required": ["query"]
138 }),
139 ..Default::default()
140 }
141 }
142
143 async fn execute(&self, params: Value) -> Result<String> {
144 let query = params["query"]
145 .as_str()
146 .ok_or_else(|| anyhow::anyhow!("missing 'query' parameter"))?;
147 let max_results = params["max_results"].as_u64().unwrap_or(5).min(10) as usize;
148 let use_proxy = params["use_proxy"].as_bool().unwrap_or(true);
149
150 let mut config = self.config.clone();
151 if use_proxy && config.proxy.is_none() {
152 config.proxy = load_proxy_from_env();
153 if config.proxy.is_some() {
154 log::info!("WebSearch using proxy from environment: {:?}", config.proxy);
155 }
156 }
157
158 let tool = Self::with_config(config);
159 let results = tool.search_with_retry(query, max_results).await?;
160
161 if results.is_empty() {
162 return Ok("No results found. Suggestions:\n1. Check your network connection\n2. Try enabling proxy (set HTTP_PROXY env var)\n3. Try a different query".to_string());
163 }
164
165 let output = results.iter()
166 .enumerate()
167 .map(|(i, r)| {
168 let mut s = format!("{}. {}\n {}", i + 1, r.title, r.url);
169 if let Some(ref snippet) = r.snippet {
170 s.push_str(&format!("\n {}", snippet));
171 }
172 s
173 })
174 .collect::<Vec<_>>()
175 .join("\n\n");
176
177 Ok(output)
178 }
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184 use parser::{strip_html_tags, clean_url};
185
186 #[test]
187 fn test_strip_html_tags() {
188 assert_eq!(strip_html_tags("<b>hello</b>"), "hello");
189 assert_eq!(strip_html_tags("a & b"), "a & b");
190 assert_eq!(strip_html_tags(" <span>test</span> "), "test");
191 }
192
193 #[test]
194 fn test_clean_url() {
195 let redirect_url = "https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com&rut=abc";
196 assert_eq!(clean_url(redirect_url), "https://example.com");
197
198 let normal_url = "https://example.com/page";
199 assert_eq!(clean_url(normal_url), "https://example.com/page");
200 }
201
202 #[test]
203 fn test_config_default() {
204 let config = WebSearchConfig::default();
205 assert_eq!(config.timeout_secs, 30);
206 assert_eq!(config.max_retries, 3);
207 assert!(config.enable_fallback);
208 }
209}
210
211#[cfg(test)]
212mod integration_tests {
213 use super::*;
214 use tokio;
215
216 #[tokio::test]
217 #[ignore]
218 async fn test_real_websearch_full() {
219 let tool = WebSearchTool::new();
220 let params = json!({
221 "query": "Rust programming",
222 "max_results": 5
223 });
224
225 match tool.execute(params).await {
226 Ok(result) => {
227 println!("Full websearch result:\n{}", result);
228 assert!(!result.contains("No results found"), "Should find results via Wikipedia fallback");
229 }
230 Err(e) => {
231 eprintln!("Error: {:?}", e);
232 panic!("Websearch should succeed with Wikipedia fallback");
233 }
234 }
235 }
236}