Skip to main content

matrixcode_core/tools/websearch/
mod.rs

1//! Web Search Tool
2//!
3//! Performs web searches using multiple backends with proxy support and retry mechanism.
4
5mod backends;
6mod client;
7mod parser;
8
9use anyhow::Result;
10use async_trait::async_trait;
11use serde::{Deserialize, Serialize};
12use serde_json::{Value, json};
13use std::time::Duration;
14
15use super::{Tool, ToolDefinition};
16use client::{create_client, load_proxy_from_env};
17use parser::SearchResult;
18
19pub use parser::{SearchResultParser, clean_url};
20
21/// Web search configuration
22const DEFAULT_TIMEOUT_SECS: u64 = 30;
23const MAX_TIMEOUT_SECS: u64 = 120;
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct WebSearchConfig {
27    /// Proxy URL (e.g., "http://127.0.0.1:7890")
28    pub proxy: Option<String>,
29    /// Timeout in seconds
30    pub timeout_secs: u64,
31    /// Max retry attempts
32    pub max_retries: u32,
33    /// Enable fallback to alternative backends
34    pub enable_fallback: bool,
35}
36
37impl Default for WebSearchConfig {
38    fn default() -> Self {
39        Self {
40            proxy: None,
41            timeout_secs: DEFAULT_TIMEOUT_SECS,
42            max_retries: 3,
43            enable_fallback: true,
44        }
45    }
46}
47
48/// Web search tool with proxy support and retry mechanism
49pub struct WebSearchTool {
50    config: WebSearchConfig,
51}
52
53impl Default for WebSearchTool {
54    fn default() -> Self {
55        Self::new()
56    }
57}
58
59impl WebSearchTool {
60    pub fn new() -> Self {
61        Self {
62            config: WebSearchConfig::default(),
63        }
64    }
65
66    pub fn with_config(config: WebSearchConfig) -> Self {
67        Self { config }
68    }
69
70    /// Search with retry mechanism
71    async fn search_with_retry(
72        &self,
73        query: &str,
74        max_results: usize,
75    ) -> Result<Vec<SearchResult>> {
76        let client = create_client(self.config.proxy.as_deref(), self.config.timeout_secs)?;
77        let mut last_error: Option<anyhow::Error> = None;
78
79        for attempt in 0..self.config.max_retries {
80            if attempt > 0 {
81                let delay = Duration::from_secs(1 << (attempt - 1));
82                tokio::time::sleep(delay).await;
83                log::info!(
84                    "WebSearch retry attempt {} after {}s delay",
85                    attempt + 1,
86                    delay.as_secs()
87                );
88            }
89
90            match backends::search_duckduckgo(&client, query, max_results).await {
91                Ok(results) if !results.is_empty() => {
92                    log::info!("WebSearch succeeded on attempt {}", attempt + 1);
93                    return Ok(results);
94                }
95                Ok(_) => {
96                    log::warn!(
97                        "WebSearch returned empty results on attempt {}",
98                        attempt + 1
99                    );
100                    last_error = Some(anyhow::anyhow!("No search results found"));
101                }
102                Err(e) => {
103                    log::warn!("WebSearch failed on attempt {}: {}", attempt + 1, e);
104                    last_error = Some(e);
105                }
106            }
107        }
108
109        // Try fallback backends if enabled
110        if self.config.enable_fallback {
111            log::info!("Trying fallback search backends...");
112
113            if let Ok(results) = backends::search_wikipedia(&client, query, max_results).await
114                && !results.is_empty()
115            {
116                log::info!("Fallback search succeeded via Wikipedia");
117                return Ok(results);
118            }
119
120            if let Ok(results) = backends::search_searxng(&client, query, max_results).await
121                && !results.is_empty()
122            {
123                log::info!("Fallback search succeeded via SearXNG");
124                return Ok(results);
125            }
126        }
127
128        Err(last_error.unwrap_or_else(|| {
129            anyhow::anyhow!("WebSearch failed after {} retries", self.config.max_retries)
130        }))
131    }
132}
133
134#[async_trait]
135impl Tool for WebSearchTool {
136    fn definition(&self) -> ToolDefinition {
137        ToolDefinition {
138            name: "websearch".to_string(),
139            description: "使用 DuckDuckGo 搜索网络信息。返回包含标题、URL 和摘要的搜索结果列表。用于查找互联网上的最新信息。支持代理、自动重试和自定义超时。".to_string(),
140            parameters: json!({
141                "type": "object",
142                "properties": {
143                    "query": {
144                        "type": "string",
145                        "description": "搜索查询"
146                    },
147                    "max_results": {
148                        "type": "integer",
149                        "description": "最大返回结果数(默认 5,最大 10)"
150                    },
151                    "use_proxy": {
152                        "type": "boolean",
153                        "description": "是否使用代理(默认自动检测环境变量 HTTP_PROXY)"
154                    },
155                    "timeout_secs": {
156                        "type": "integer",
157                        "description": format!("超时时间(秒,默认 {},最大 {})", DEFAULT_TIMEOUT_SECS, MAX_TIMEOUT_SECS)
158                    }
159                },
160                "required": ["query"]
161            }),
162            ..Default::default()
163        }
164    }
165
166    async fn execute(&self, params: Value) -> Result<String> {
167        let query = params["query"]
168            .as_str()
169            .ok_or_else(|| anyhow::anyhow!("missing 'query' parameter"))?;
170        let max_results = params["max_results"].as_u64().unwrap_or(5).min(10) as usize;
171        let use_proxy = params["use_proxy"].as_bool().unwrap_or(true);
172        let timeout_secs = params["timeout_secs"]
173            .as_u64()
174            .unwrap_or(DEFAULT_TIMEOUT_SECS)
175            .min(MAX_TIMEOUT_SECS);
176
177        let mut config = self.config.clone();
178        config.timeout_secs = timeout_secs;
179        if use_proxy && config.proxy.is_none() {
180            config.proxy = load_proxy_from_env();
181            if config.proxy.is_some() {
182                log::info!("WebSearch using proxy from environment: {:?}", config.proxy);
183            }
184        }
185
186        let tool = Self::with_config(config);
187        let results = tool.search_with_retry(query, max_results).await?;
188
189        if results.is_empty() {
190            return Ok("No results found. Suggestions:\n1. Check your network connection\n2. Try enabling proxy (set HTTP_PROXY env var)\n3. Try a different query".to_string());
191        }
192
193        let output = results
194            .iter()
195            .enumerate()
196            .map(|(i, r)| {
197                let mut s = format!("{}. {}\n   {}", i + 1, r.title, r.url);
198                if let Some(ref snippet) = r.snippet {
199                    s.push_str(&format!("\n   {}", snippet));
200                }
201                s
202            })
203            .collect::<Vec<_>>()
204            .join("\n\n");
205
206        Ok(output)
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213    use parser::{clean_url, strip_html_tags};
214
215    #[test]
216    fn test_strip_html_tags() {
217        assert_eq!(strip_html_tags("<b>hello</b>"), "hello");
218        assert_eq!(strip_html_tags("a &amp; b"), "a & b");
219        assert_eq!(strip_html_tags("  <span>test</span>  "), "test");
220    }
221
222    #[test]
223    fn test_clean_url() {
224        let redirect_url = "https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com&rut=abc";
225        assert_eq!(clean_url(redirect_url), "https://example.com");
226
227        let normal_url = "https://example.com/page";
228        assert_eq!(clean_url(normal_url), "https://example.com/page");
229    }
230
231    #[test]
232    fn test_config_default() {
233        let config = WebSearchConfig::default();
234        assert_eq!(config.timeout_secs, 30);
235        assert_eq!(config.max_retries, 3);
236        assert!(config.enable_fallback);
237    }
238}
239
240#[cfg(test)]
241mod integration_tests {
242    use super::*;
243    use tokio;
244
245    #[tokio::test]
246    #[ignore]
247    async fn test_real_websearch_full() {
248        let tool = WebSearchTool::new();
249        let params = json!({
250            "query": "Rust programming",
251            "max_results": 5
252        });
253
254        match tool.execute(params).await {
255            Ok(result) => {
256                println!("Full websearch result:\n{}", result);
257                assert!(
258                    !result.contains("No results found"),
259                    "Should find results via Wikipedia fallback"
260                );
261            }
262            Err(e) => {
263                eprintln!("Error: {:?}", e);
264                panic!("Websearch should succeed with Wikipedia fallback");
265            }
266        }
267    }
268}