Skip to main content

matrixcode_core/tools/websearch/
mod.rs

1//! Web Search Tool
2//!
3//! Performs web searches using multiple backends with proxy support and retry mechanism.
4
5mod backends;
6mod client;
7mod parser;
8
9use anyhow::Result;
10use async_trait::async_trait;
11use serde::{Deserialize, Serialize};
12use serde_json::{Value, json};
13use std::time::Duration;
14
15use super::{Tool, ToolDefinition};
16use client::{create_client, load_proxy_from_env};
17use parser::SearchResult;
18
19pub use parser::{SearchResultParser, clean_url};
20
21/// Web search configuration
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct WebSearchConfig {
24    /// Proxy URL (e.g., "http://127.0.0.1:7890")
25    pub proxy: Option<String>,
26    /// Timeout in seconds
27    pub timeout_secs: u64,
28    /// Max retry attempts
29    pub max_retries: u32,
30    /// Enable fallback to alternative backends
31    pub enable_fallback: bool,
32}
33
34impl Default for WebSearchConfig {
35    fn default() -> Self {
36        Self {
37            proxy: None,
38            timeout_secs: 30,
39            max_retries: 3,
40            enable_fallback: true,
41        }
42    }
43}
44
45/// Web search tool with proxy support and retry mechanism
46pub struct WebSearchTool {
47    config: WebSearchConfig,
48}
49
50impl Default for WebSearchTool {
51    fn default() -> Self {
52        Self::new()
53    }
54}
55
56impl WebSearchTool {
57    pub fn new() -> Self {
58        Self { config: WebSearchConfig::default() }
59    }
60
61    pub fn with_config(config: WebSearchConfig) -> Self {
62        Self { config }
63    }
64
65    /// Search with retry mechanism
66    async fn search_with_retry(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
67        let client = create_client(self.config.proxy.as_deref(), self.config.timeout_secs)?;
68        let mut last_error: Option<anyhow::Error> = None;
69
70        for attempt in 0..self.config.max_retries {
71            if attempt > 0 {
72                let delay = Duration::from_secs(1 << (attempt - 1));
73                tokio::time::sleep(delay).await;
74                log::info!("WebSearch retry attempt {} after {}s delay", attempt + 1, delay.as_secs());
75            }
76
77            match backends::search_duckduckgo(&client, query, max_results).await {
78                Ok(results) if !results.is_empty() => {
79                    log::info!("WebSearch succeeded on attempt {}", attempt + 1);
80                    return Ok(results);
81                }
82                Ok(_) => {
83                    log::warn!("WebSearch returned empty results on attempt {}", attempt + 1);
84                    last_error = Some(anyhow::anyhow!("No search results found"));
85                }
86                Err(e) => {
87                    log::warn!("WebSearch failed on attempt {}: {}", attempt + 1, e);
88                    last_error = Some(e);
89                }
90            }
91        }
92
93        // Try fallback backends if enabled
94        if self.config.enable_fallback {
95            log::info!("Trying fallback search backends...");
96
97            if let Ok(results) = backends::search_wikipedia(&client, query, max_results).await
98                && !results.is_empty() {
99                    log::info!("Fallback search succeeded via Wikipedia");
100                    return Ok(results);
101                }
102
103            if let Ok(results) = backends::search_searxng(&client, query, max_results).await
104                && !results.is_empty() {
105                    log::info!("Fallback search succeeded via SearXNG");
106                    return Ok(results);
107                }
108        }
109
110        Err(last_error
111            .unwrap_or_else(|| anyhow::anyhow!("WebSearch failed after {} retries", self.config.max_retries)))
112    }
113}
114
115#[async_trait]
116impl Tool for WebSearchTool {
117    fn definition(&self) -> ToolDefinition {
118        ToolDefinition {
119            name: "websearch".to_string(),
120            description: "使用 DuckDuckGo 搜索网络信息。返回包含标题、URL 和摘要的搜索结果列表。用于查找互联网上的最新信息。支持代理和自动重试。".to_string(),
121            parameters: json!({
122                "type": "object",
123                "properties": {
124                    "query": {
125                        "type": "string",
126                        "description": "搜索查询"
127                    },
128                    "max_results": {
129                        "type": "integer",
130                        "description": "最大返回结果数(默认 5,最大 10)"
131                    },
132                    "use_proxy": {
133                        "type": "boolean",
134                        "description": "是否使用代理(默认自动检测环境变量 HTTP_PROXY)"
135                    }
136                },
137                "required": ["query"]
138            }),
139            ..Default::default()
140        }
141    }
142
143    async fn execute(&self, params: Value) -> Result<String> {
144        let query = params["query"]
145            .as_str()
146            .ok_or_else(|| anyhow::anyhow!("missing 'query' parameter"))?;
147        let max_results = params["max_results"].as_u64().unwrap_or(5).min(10) as usize;
148        let use_proxy = params["use_proxy"].as_bool().unwrap_or(true);
149
150        let mut config = self.config.clone();
151        if use_proxy && config.proxy.is_none() {
152            config.proxy = load_proxy_from_env();
153            if config.proxy.is_some() {
154                log::info!("WebSearch using proxy from environment: {:?}", config.proxy);
155            }
156        }
157
158        let tool = Self::with_config(config);
159        let results = tool.search_with_retry(query, max_results).await?;
160
161        if results.is_empty() {
162            return Ok("No results found. Suggestions:\n1. Check your network connection\n2. Try enabling proxy (set HTTP_PROXY env var)\n3. Try a different query".to_string());
163        }
164
165        let output = results.iter()
166            .enumerate()
167            .map(|(i, r)| {
168                let mut s = format!("{}. {}\n   {}", i + 1, r.title, r.url);
169                if let Some(ref snippet) = r.snippet {
170                    s.push_str(&format!("\n   {}", snippet));
171                }
172                s
173            })
174            .collect::<Vec<_>>()
175            .join("\n\n");
176
177        Ok(output)
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use parser::{strip_html_tags, clean_url};
185
186    #[test]
187    fn test_strip_html_tags() {
188        assert_eq!(strip_html_tags("<b>hello</b>"), "hello");
189        assert_eq!(strip_html_tags("a &amp; b"), "a & b");
190        assert_eq!(strip_html_tags("  <span>test</span>  "), "test");
191    }
192
193    #[test]
194    fn test_clean_url() {
195        let redirect_url = "https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com&rut=abc";
196        assert_eq!(clean_url(redirect_url), "https://example.com");
197
198        let normal_url = "https://example.com/page";
199        assert_eq!(clean_url(normal_url), "https://example.com/page");
200    }
201
202    #[test]
203    fn test_config_default() {
204        let config = WebSearchConfig::default();
205        assert_eq!(config.timeout_secs, 30);
206        assert_eq!(config.max_retries, 3);
207        assert!(config.enable_fallback);
208    }
209}
210
211#[cfg(test)]
212mod integration_tests {
213    use super::*;
214    use tokio;
215
216    #[tokio::test]
217    #[ignore]
218    async fn test_real_websearch_full() {
219        let tool = WebSearchTool::new();
220        let params = json!({
221            "query": "Rust programming",
222            "max_results": 5
223        });
224
225        match tool.execute(params).await {
226            Ok(result) => {
227                println!("Full websearch result:\n{}", result);
228                assert!(!result.contains("No results found"), "Should find results via Wikipedia fallback");
229            }
230            Err(e) => {
231                eprintln!("Error: {:?}", e);
232                panic!("Websearch should succeed with Wikipedia fallback");
233            }
234        }
235    }
236}