reasonkit/web/
mod.rs

1//! Web Search Integration for ReasonKit Web
2//!
3//! Provides web search capabilities for deep research:
4//! - DuckDuckGo (free, no API key)
5//! - Tavily (optional, better quality with API key)
6//! - Serper (optional, Google search)
7//!
8//! ## Usage
9//!
10//! ```rust,ignore
11//! use reasonkit::web::{WebSearcher, SearchConfig, SearchProvider};
12//!
13//! let config = SearchConfig::default();
14//! let searcher = WebSearcher::new(config);
15//! let results = searcher.search("rust async programming").await?;
16//! ```
17
18use crate::error::{Error, Result};
19use serde::{Deserialize, Serialize};
20use std::time::Duration;
21
22/// Web search configuration
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct SearchConfig {
25    /// Primary search provider
26    pub provider: SearchProvider,
27    /// Number of results to fetch
28    pub num_results: usize,
29    /// Request timeout
30    pub timeout_secs: u64,
31    /// Tavily API key (optional)
32    pub tavily_api_key: Option<String>,
33    /// Serper API key (optional)
34    pub serper_api_key: Option<String>,
35}
36
37impl Default for SearchConfig {
38    fn default() -> Self {
39        Self {
40            provider: SearchProvider::DuckDuckGo,
41            num_results: 5,
42            timeout_secs: 30,
43            tavily_api_key: std::env::var("TAVILY_API_KEY").ok(),
44            serper_api_key: std::env::var("SERPER_API_KEY").ok(),
45        }
46    }
47}
48
49/// Search provider options
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
51#[serde(rename_all = "lowercase")]
52pub enum SearchProvider {
53    /// DuckDuckGo (free, no API key)
54    #[default]
55    DuckDuckGo,
56    /// Tavily (requires API key, better quality)
57    Tavily,
58    /// Serper (requires API key, Google search)
59    Serper,
60    /// Auto-select best available
61    Auto,
62}
63
64/// A single search result
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct SearchResult {
67    /// Result title
68    pub title: String,
69    /// Result URL
70    pub url: String,
71    /// Result snippet/description
72    pub snippet: String,
73    /// Source provider
74    pub source: SearchProvider,
75}
76
77/// Web search client
78pub struct WebSearcher {
79    config: SearchConfig,
80    client: reqwest::Client,
81}
82
83impl WebSearcher {
84    /// Create a new web searcher
85    pub fn new(config: SearchConfig) -> Self {
86        let client = reqwest::Client::builder()
87            .timeout(Duration::from_secs(config.timeout_secs))
88            .user_agent("ReasonKit/0.1 (https://reasonkit.sh)")
89            .build()
90            .unwrap_or_default();
91
92        Self { config, client }
93    }
94
95    /// Search using the configured provider
96    pub async fn search(&self, query: &str) -> Result<Vec<SearchResult>> {
97        let provider = match self.config.provider {
98            SearchProvider::Auto => self.auto_select_provider(),
99            p => p,
100        };
101
102        match provider {
103            SearchProvider::DuckDuckGo => self.search_duckduckgo(query).await,
104            SearchProvider::Tavily => self.search_tavily(query).await,
105            SearchProvider::Serper => self.search_serper(query).await,
106            SearchProvider::Auto => self.search_duckduckgo(query).await,
107        }
108    }
109
110    /// Auto-select best available provider
111    fn auto_select_provider(&self) -> SearchProvider {
112        if self.config.tavily_api_key.is_some() {
113            SearchProvider::Tavily
114        } else if self.config.serper_api_key.is_some() {
115            SearchProvider::Serper
116        } else {
117            SearchProvider::DuckDuckGo
118        }
119    }
120
121    /// Search using DuckDuckGo HTML API (free, no key)
122    async fn search_duckduckgo(&self, query: &str) -> Result<Vec<SearchResult>> {
123        // DuckDuckGo Instant Answer API (JSON)
124        let url = format!(
125            "https://api.duckduckgo.com/?q={}&format=json&no_html=1&skip_disambig=1",
126            urlencoding::encode(query)
127        );
128
129        let response = self
130            .client
131            .get(&url)
132            .send()
133            .await
134            .map_err(|e| Error::Network(format!("DuckDuckGo request failed: {}", e)))?;
135
136        let data: DuckDuckGoResponse = response
137            .json()
138            .await
139            .map_err(|e| Error::Network(format!("DuckDuckGo parse failed: {}", e)))?;
140
141        let mut results = Vec::new();
142
143        // Add abstract if present
144        if !data.abstract_text.is_empty() {
145            results.push(SearchResult {
146                title: data.heading.clone(),
147                url: data.abstract_url.clone(),
148                snippet: data.abstract_text.clone(),
149                source: SearchProvider::DuckDuckGo,
150            });
151        }
152
153        // Add related topics
154        for topic in data.related_topics.iter().take(self.config.num_results) {
155            if let (Some(text), Some(first_url)) = (&topic.text, &topic.first_url) {
156                results.push(SearchResult {
157                    title: text.chars().take(100).collect(),
158                    url: first_url.clone(),
159                    snippet: text.clone(),
160                    source: SearchProvider::DuckDuckGo,
161                });
162            }
163        }
164
165        // If no results from instant answer, try DuckDuckGo lite
166        if results.is_empty() {
167            results = self.search_duckduckgo_lite(query).await?;
168        }
169
170        Ok(results)
171    }
172
173    /// Fallback DuckDuckGo HTML scraping
174    async fn search_duckduckgo_lite(&self, query: &str) -> Result<Vec<SearchResult>> {
175        let url = format!(
176            "https://lite.duckduckgo.com/lite/?q={}",
177            urlencoding::encode(query)
178        );
179
180        let response = self
181            .client
182            .get(&url)
183            .send()
184            .await
185            .map_err(|e| Error::Network(format!("DuckDuckGo lite request failed: {}", e)))?;
186
187        let html = response
188            .text()
189            .await
190            .map_err(|e| Error::Network(format!("DuckDuckGo lite read failed: {}", e)))?;
191
192        // Simple HTML parsing for DDG Lite results
193        let mut results = Vec::new();
194        let document = scraper::Html::parse_document(&html);
195        let result_selector = scraper::Selector::parse("a.result-link")
196            .unwrap_or_else(|_| scraper::Selector::parse("a").unwrap());
197
198        for (i, element) in document.select(&result_selector).enumerate() {
199            if i >= self.config.num_results {
200                break;
201            }
202
203            if let Some(href) = element.value().attr("href") {
204                if href.starts_with("http") {
205                    let title = element.text().collect::<String>();
206                    results.push(SearchResult {
207                        title: title.clone(),
208                        url: href.to_string(),
209                        snippet: title,
210                        source: SearchProvider::DuckDuckGo,
211                    });
212                }
213            }
214        }
215
216        Ok(results)
217    }
218
219    /// Search using Tavily API (requires API key)
220    async fn search_tavily(&self, query: &str) -> Result<Vec<SearchResult>> {
221        let api_key = self
222            .config
223            .tavily_api_key
224            .as_ref()
225            .ok_or_else(|| Error::Config("TAVILY_API_KEY not set".to_string()))?;
226
227        let request = TavilyRequest {
228            api_key: api_key.clone(),
229            query: query.to_string(),
230            search_depth: "advanced".to_string(),
231            max_results: self.config.num_results,
232            include_answer: true,
233        };
234
235        let response = self
236            .client
237            .post("https://api.tavily.com/search")
238            .json(&request)
239            .send()
240            .await
241            .map_err(|e| Error::Network(format!("Tavily request failed: {}", e)))?;
242
243        if !response.status().is_success() {
244            return Err(Error::Network(format!(
245                "Tavily API error: {}",
246                response.status()
247            )));
248        }
249
250        let data: TavilyResponse = response
251            .json()
252            .await
253            .map_err(|e| Error::Network(format!("Tavily parse failed: {}", e)))?;
254
255        let mut results = Vec::new();
256
257        // Add Tavily's answer if present
258        if let Some(answer) = data.answer {
259            results.push(SearchResult {
260                title: "Tavily AI Answer".to_string(),
261                url: String::new(),
262                snippet: answer,
263                source: SearchProvider::Tavily,
264            });
265        }
266
267        // Add search results
268        for result in data.results {
269            results.push(SearchResult {
270                title: result.title,
271                url: result.url,
272                snippet: result.content,
273                source: SearchProvider::Tavily,
274            });
275        }
276
277        Ok(results)
278    }
279
280    /// Search using Serper API (requires API key)
281    async fn search_serper(&self, query: &str) -> Result<Vec<SearchResult>> {
282        let api_key = self
283            .config
284            .serper_api_key
285            .as_ref()
286            .ok_or_else(|| Error::Config("SERPER_API_KEY not set".to_string()))?;
287
288        let request = serde_json::json!({
289            "q": query,
290            "num": self.config.num_results
291        });
292
293        let response = self
294            .client
295            .post("https://google.serper.dev/search")
296            .header("X-API-KEY", api_key)
297            .json(&request)
298            .send()
299            .await
300            .map_err(|e| Error::Network(format!("Serper request failed: {}", e)))?;
301
302        if !response.status().is_success() {
303            return Err(Error::Network(format!(
304                "Serper API error: {}",
305                response.status()
306            )));
307        }
308
309        let data: SerperResponse = response
310            .json()
311            .await
312            .map_err(|e| Error::Network(format!("Serper parse failed: {}", e)))?;
313
314        let mut results = Vec::new();
315
316        // Add answer box if present
317        if let Some(answer_box) = data.answer_box {
318            results.push(SearchResult {
319                title: answer_box.title.unwrap_or_else(|| "Answer".to_string()),
320                url: answer_box.link.unwrap_or_default(),
321                snippet: answer_box
322                    .answer
323                    .unwrap_or_else(|| answer_box.snippet.unwrap_or_default()),
324                source: SearchProvider::Serper,
325            });
326        }
327
328        // Add organic results
329        for result in data.organic.unwrap_or_default() {
330            results.push(SearchResult {
331                title: result.title,
332                url: result.link,
333                snippet: result.snippet.unwrap_or_default(),
334                source: SearchProvider::Serper,
335            });
336        }
337
338        Ok(results)
339    }
340}
341
342// ═══════════════════════════════════════════════════════════════════════════
343// API RESPONSE TYPES
344// ═══════════════════════════════════════════════════════════════════════════
345
346/// DuckDuckGo Instant Answer API response
347#[derive(Debug, Deserialize)]
348#[serde(rename_all = "PascalCase")]
349struct DuckDuckGoResponse {
350    #[serde(default)]
351    abstract_text: String,
352    #[serde(default)]
353    abstract_url: String,
354    #[serde(default)]
355    heading: String,
356    #[serde(default)]
357    related_topics: Vec<DuckDuckGoTopic>,
358}
359
360#[derive(Debug, Deserialize)]
361#[serde(rename_all = "PascalCase")]
362struct DuckDuckGoTopic {
363    text: Option<String>,
364    first_url: Option<String>,
365}
366
367/// Tavily API request
368#[derive(Debug, Serialize)]
369struct TavilyRequest {
370    api_key: String,
371    query: String,
372    search_depth: String,
373    max_results: usize,
374    include_answer: bool,
375}
376
377/// Tavily API response
378#[derive(Debug, Deserialize)]
379struct TavilyResponse {
380    answer: Option<String>,
381    #[serde(default)]
382    results: Vec<TavilyResult>,
383}
384
385#[derive(Debug, Deserialize)]
386struct TavilyResult {
387    title: String,
388    url: String,
389    content: String,
390}
391
392/// Serper API response
393#[derive(Debug, Deserialize)]
394#[serde(rename_all = "camelCase")]
395struct SerperResponse {
396    answer_box: Option<SerperAnswerBox>,
397    organic: Option<Vec<SerperOrganic>>,
398}
399
400#[derive(Debug, Deserialize)]
401struct SerperAnswerBox {
402    title: Option<String>,
403    answer: Option<String>,
404    snippet: Option<String>,
405    link: Option<String>,
406}
407
408#[derive(Debug, Deserialize)]
409struct SerperOrganic {
410    title: String,
411    link: String,
412    snippet: Option<String>,
413}
414
415// ═══════════════════════════════════════════════════════════════════════════
416// URL ENCODING HELPER
417// ═══════════════════════════════════════════════════════════════════════════
418
419mod urlencoding {
420    pub fn encode(s: &str) -> String {
421        s.chars()
422            .map(|c| match c {
423                'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => c.to_string(),
424                ' ' => "+".to_string(),
425                _ => format!("%{:02X}", c as u32),
426            })
427            .collect()
428    }
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434
435    #[test]
436    fn test_config_default() {
437        let config = SearchConfig::default();
438        assert_eq!(config.provider, SearchProvider::DuckDuckGo);
439        assert_eq!(config.num_results, 5);
440    }
441
442    #[test]
443    fn test_url_encoding() {
444        assert_eq!(urlencoding::encode("hello world"), "hello+world");
445        assert_eq!(urlencoding::encode("rust+async"), "rust%2Basync");
446    }
447
448    #[tokio::test]
449    async fn test_duckduckgo_search() {
450        let config = SearchConfig {
451            provider: SearchProvider::DuckDuckGo,
452            num_results: 3,
453            timeout_secs: 10,
454            ..Default::default()
455        };
456        let searcher = WebSearcher::new(config);
457
458        // This test requires network access
459        let results = searcher.search("rust programming language").await;
460        // Don't assert on results since network may not be available in CI
461        assert!(results.is_ok() || results.is_err());
462    }
463}