Skip to main content

matrixcode_core/tools/websearch/
mod.rs

1//! Web Search Tool
2//!
3//! Performs web searches using multiple backends with proxy support and retry mechanism.
4
5mod backends;
6mod client;
7mod parser;
8
9use anyhow::Result;
10use async_trait::async_trait;
11use serde::{Deserialize, Serialize};
12use serde_json::{Value, json};
13use std::time::Duration;
14
15use super::{Tool, ToolDefinition};
16use client::{create_client, load_proxy_from_env};
17use parser::SearchResult;
18
19pub use parser::{SearchResultParser, clean_url};
20
21/// Web search configuration
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct WebSearchConfig {
24    /// Proxy URL (e.g., "http://127.0.0.1:7890")
25    pub proxy: Option<String>,
26    /// Timeout in seconds
27    pub timeout_secs: u64,
28    /// Max retry attempts
29    pub max_retries: u32,
30    /// Enable fallback to alternative backends
31    pub enable_fallback: bool,
32}
33
34impl Default for WebSearchConfig {
35    fn default() -> Self {
36        Self {
37            proxy: None,
38            timeout_secs: 30,
39            max_retries: 3,
40            enable_fallback: true,
41        }
42    }
43}
44
45/// Web search tool with proxy support and retry mechanism
46pub struct WebSearchTool {
47    config: WebSearchConfig,
48}
49
50impl Default for WebSearchTool {
51    fn default() -> Self {
52        Self::new()
53    }
54}
55
56impl WebSearchTool {
57    pub fn new() -> Self {
58        Self {
59            config: WebSearchConfig::default(),
60        }
61    }
62
63    pub fn with_config(config: WebSearchConfig) -> Self {
64        Self { config }
65    }
66
67    /// Search with retry mechanism
68    async fn search_with_retry(
69        &self,
70        query: &str,
71        max_results: usize,
72    ) -> Result<Vec<SearchResult>> {
73        let client = create_client(self.config.proxy.as_deref(), self.config.timeout_secs)?;
74        let mut last_error: Option<anyhow::Error> = None;
75
76        for attempt in 0..self.config.max_retries {
77            if attempt > 0 {
78                let delay = Duration::from_secs(1 << (attempt - 1));
79                tokio::time::sleep(delay).await;
80                log::info!(
81                    "WebSearch retry attempt {} after {}s delay",
82                    attempt + 1,
83                    delay.as_secs()
84                );
85            }
86
87            match backends::search_duckduckgo(&client, query, max_results).await {
88                Ok(results) if !results.is_empty() => {
89                    log::info!("WebSearch succeeded on attempt {}", attempt + 1);
90                    return Ok(results);
91                }
92                Ok(_) => {
93                    log::warn!(
94                        "WebSearch returned empty results on attempt {}",
95                        attempt + 1
96                    );
97                    last_error = Some(anyhow::anyhow!("No search results found"));
98                }
99                Err(e) => {
100                    log::warn!("WebSearch failed on attempt {}: {}", attempt + 1, e);
101                    last_error = Some(e);
102                }
103            }
104        }
105
106        // Try fallback backends if enabled
107        if self.config.enable_fallback {
108            log::info!("Trying fallback search backends...");
109
110            if let Ok(results) = backends::search_wikipedia(&client, query, max_results).await
111                && !results.is_empty()
112            {
113                log::info!("Fallback search succeeded via Wikipedia");
114                return Ok(results);
115            }
116
117            if let Ok(results) = backends::search_searxng(&client, query, max_results).await
118                && !results.is_empty()
119            {
120                log::info!("Fallback search succeeded via SearXNG");
121                return Ok(results);
122            }
123        }
124
125        Err(last_error.unwrap_or_else(|| {
126            anyhow::anyhow!("WebSearch failed after {} retries", self.config.max_retries)
127        }))
128    }
129}
130
131#[async_trait]
132impl Tool for WebSearchTool {
133    fn definition(&self) -> ToolDefinition {
134        ToolDefinition {
135            name: "websearch".to_string(),
136            description: "使用 DuckDuckGo 搜索网络信息。返回包含标题、URL 和摘要的搜索结果列表。用于查找互联网上的最新信息。支持代理和自动重试。".to_string(),
137            parameters: json!({
138                "type": "object",
139                "properties": {
140                    "query": {
141                        "type": "string",
142                        "description": "搜索查询"
143                    },
144                    "max_results": {
145                        "type": "integer",
146                        "description": "最大返回结果数(默认 5,最大 10)"
147                    },
148                    "use_proxy": {
149                        "type": "boolean",
150                        "description": "是否使用代理(默认自动检测环境变量 HTTP_PROXY)"
151                    }
152                },
153                "required": ["query"]
154            }),
155            ..Default::default()
156        }
157    }
158
159    async fn execute(&self, params: Value) -> Result<String> {
160        let query = params["query"]
161            .as_str()
162            .ok_or_else(|| anyhow::anyhow!("missing 'query' parameter"))?;
163        let max_results = params["max_results"].as_u64().unwrap_or(5).min(10) as usize;
164        let use_proxy = params["use_proxy"].as_bool().unwrap_or(true);
165
166        let mut config = self.config.clone();
167        if use_proxy && config.proxy.is_none() {
168            config.proxy = load_proxy_from_env();
169            if config.proxy.is_some() {
170                log::info!("WebSearch using proxy from environment: {:?}", config.proxy);
171            }
172        }
173
174        let tool = Self::with_config(config);
175        let results = tool.search_with_retry(query, max_results).await?;
176
177        if results.is_empty() {
178            return Ok("No results found. Suggestions:\n1. Check your network connection\n2. Try enabling proxy (set HTTP_PROXY env var)\n3. Try a different query".to_string());
179        }
180
181        let output = results
182            .iter()
183            .enumerate()
184            .map(|(i, r)| {
185                let mut s = format!("{}. {}\n   {}", i + 1, r.title, r.url);
186                if let Some(ref snippet) = r.snippet {
187                    s.push_str(&format!("\n   {}", snippet));
188                }
189                s
190            })
191            .collect::<Vec<_>>()
192            .join("\n\n");
193
194        Ok(output)
195    }
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201    use parser::{clean_url, strip_html_tags};
202
203    #[test]
204    fn test_strip_html_tags() {
205        assert_eq!(strip_html_tags("<b>hello</b>"), "hello");
206        assert_eq!(strip_html_tags("a &amp; b"), "a & b");
207        assert_eq!(strip_html_tags("  <span>test</span>  "), "test");
208    }
209
210    #[test]
211    fn test_clean_url() {
212        let redirect_url = "https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com&rut=abc";
213        assert_eq!(clean_url(redirect_url), "https://example.com");
214
215        let normal_url = "https://example.com/page";
216        assert_eq!(clean_url(normal_url), "https://example.com/page");
217    }
218
219    #[test]
220    fn test_config_default() {
221        let config = WebSearchConfig::default();
222        assert_eq!(config.timeout_secs, 30);
223        assert_eq!(config.max_retries, 3);
224        assert!(config.enable_fallback);
225    }
226}
227
228#[cfg(test)]
229mod integration_tests {
230    use super::*;
231    use tokio;
232
233    #[tokio::test]
234    #[ignore]
235    async fn test_real_websearch_full() {
236        let tool = WebSearchTool::new();
237        let params = json!({
238            "query": "Rust programming",
239            "max_results": 5
240        });
241
242        match tool.execute(params).await {
243            Ok(result) => {
244                println!("Full websearch result:\n{}", result);
245                assert!(
246                    !result.contains("No results found"),
247                    "Should find results via Wikipedia fallback"
248                );
249            }
250            Err(e) => {
251                eprintln!("Error: {:?}", e);
252                panic!("Websearch should succeed with Wikipedia fallback");
253            }
254        }
255    }
256}