use std::sync::Arc;
use async_trait::async_trait;
use scraper::{Html, Selector};
use crate::fetcher::PageFetcher;
use crate::{
Engine, EngineCategory, EngineConfig, HttpFetcher, Result, SearchError, SearchQuery,
SearchResult,
};
pub struct DuckDuckGo {
config: EngineConfig,
fetcher: Arc<dyn PageFetcher>,
}
impl DuckDuckGo {
pub fn new() -> Self {
Self::with_fetcher(Arc::new(HttpFetcher::new()))
}
pub fn with_fetcher(fetcher: Arc<dyn PageFetcher>) -> Self {
Self {
config: EngineConfig {
name: "DuckDuckGo".to_string(),
shortcut: "ddg".to_string(),
categories: vec![EngineCategory::General],
weight: 1.0,
timeout: 5,
enabled: true,
paging: true,
safesearch: true,
},
fetcher,
}
}
pub fn with_config(mut self, config: EngineConfig) -> Self {
self.config = config;
self
}
}
impl Default for DuckDuckGo {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Engine for DuckDuckGo {
fn config(&self) -> &EngineConfig {
&self.config
}
async fn search(&self, query: &SearchQuery) -> Result<Vec<SearchResult>> {
let url = format!(
"https://html.duckduckgo.com/html/?q={}",
urlencoding::encode(&query.query)
);
let html = self.fetcher.fetch(&url).await?;
self.parse_results(&html)
}
}
impl DuckDuckGo {
fn parse_results(&self, html: &str) -> Result<Vec<SearchResult>> {
let document = Html::parse_document(html);
let result_selector = Selector::parse(".result")
.map_err(|e| SearchError::Parse(format!("Failed to parse selector: {:?}", e)))?;
let title_selector = Selector::parse(".result__title a")
.map_err(|e| SearchError::Parse(format!("Failed to parse selector: {:?}", e)))?;
let snippet_selector = Selector::parse(".result__snippet")
.map_err(|e| SearchError::Parse(format!("Failed to parse selector: {:?}", e)))?;
let mut results = Vec::new();
for element in document.select(&result_selector) {
let title_elem = element.select(&title_selector).next();
let snippet_elem = element.select(&snippet_selector).next();
if let Some(title_elem) = title_elem {
let title = title_elem.text().collect::<String>().trim().to_string();
let url = title_elem.value().attr("href").unwrap_or_default();
let url = if url.starts_with("//duckduckgo.com/l/") {
extract_redirect_url(url).unwrap_or_else(|| url.to_string())
} else {
url.to_string()
};
let content = snippet_elem
.map(|e| e.text().collect::<String>().trim().to_string())
.unwrap_or_default();
if !url.is_empty() && !title.is_empty() {
results.push(SearchResult::new(url, title, content));
}
}
}
Ok(results)
}
}
fn extract_redirect_url(url: &str) -> Option<String> {
let url = url.trim_start_matches("//duckduckgo.com/l/?uddg=");
let decoded = urlencoding::decode(url).ok()?;
let end = decoded.find('&').unwrap_or(decoded.len());
Some(decoded[..end].to_string())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::HttpFetcher;
#[test]
fn test_duckduckgo_new() {
let engine = DuckDuckGo::new();
assert_eq!(engine.config.name, "DuckDuckGo");
assert_eq!(engine.config.shortcut, "ddg");
assert_eq!(engine.config.categories, vec![EngineCategory::General]);
assert_eq!(engine.config.weight, 1.0);
assert_eq!(engine.config.timeout, 5);
assert!(engine.config.enabled);
assert!(engine.config.paging);
assert!(engine.config.safesearch);
}
#[test]
fn test_duckduckgo_with_fetcher() {
let fetcher: Arc<dyn PageFetcher> = Arc::new(HttpFetcher::new());
let engine = DuckDuckGo::with_fetcher(fetcher);
assert_eq!(engine.name(), "DuckDuckGo");
}
#[test]
fn test_duckduckgo_default() {
let engine = DuckDuckGo::default();
assert_eq!(engine.name(), "DuckDuckGo");
}
#[test]
fn test_duckduckgo_with_config() {
let custom_config = EngineConfig {
name: "Custom DDG".to_string(),
shortcut: "cddg".to_string(),
weight: 2.0,
..Default::default()
};
let engine = DuckDuckGo::new().with_config(custom_config);
assert_eq!(engine.name(), "Custom DDG");
assert_eq!(engine.shortcut(), "cddg");
assert_eq!(engine.weight(), 2.0);
}
#[test]
fn test_duckduckgo_engine_trait() {
let engine = DuckDuckGo::new();
assert_eq!(engine.name(), "DuckDuckGo");
assert_eq!(engine.shortcut(), "ddg");
assert_eq!(engine.weight(), 1.0);
assert!(engine.is_enabled());
}
#[test]
fn test_extract_redirect_url() {
let url = "//duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fpage&rut=abc";
let result = extract_redirect_url(url);
assert_eq!(result, Some("https://example.com/page".to_string()));
}
#[test]
fn test_extract_redirect_url_no_params() {
let url = "//duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com";
let result = extract_redirect_url(url);
assert_eq!(result, Some("https://example.com".to_string()));
}
#[test]
fn test_parse_results_empty_html() {
let engine = DuckDuckGo::new();
let results = engine.parse_results("<html><body></body></html>").unwrap();
assert!(results.is_empty());
}
#[test]
fn test_parse_results_with_results() {
let engine = DuckDuckGo::new();
let html = r#"
<html>
<body>
<div class="result">
<h2 class="result__title"><a href="https://example.com">Example Title</a></h2>
<div class="result__snippet">Example snippet text</div>
</div>
</body>
</html>
"#;
let results = engine.parse_results(html).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].title, "Example Title");
assert_eq!(results[0].url, "https://example.com");
assert_eq!(results[0].content, "Example snippet text");
}
#[test]
fn test_parse_results_with_redirect_url() {
let engine = DuckDuckGo::new();
let html = r#"
<html>
<body>
<div class="result">
<h2 class="result__title"><a href="//duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fpage&rut=abc">Redirected Result</a></h2>
<div class="result__snippet">Snippet for redirected result</div>
</div>
</body>
</html>
"#;
let results = engine.parse_results(html).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].url, "https://example.com/page");
assert_eq!(results[0].title, "Redirected Result");
assert_eq!(results[0].content, "Snippet for redirected result");
}
#[test]
fn test_parse_results_multiple() {
let engine = DuckDuckGo::new();
let html = r#"
<html>
<body>
<div class="result">
<h2 class="result__title"><a href="https://first.com">First</a></h2>
<div class="result__snippet">First snippet</div>
</div>
<div class="result">
<h2 class="result__title"><a href="https://second.com">Second</a></h2>
<div class="result__snippet">Second snippet</div>
</div>
</body>
</html>
"#;
let results = engine.parse_results(html).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].url, "https://first.com");
assert_eq!(results[1].url, "https://second.com");
}
#[test]
fn test_parse_results_no_snippet() {
let engine = DuckDuckGo::new();
let html = r#"
<html>
<body>
<div class="result">
<h2 class="result__title"><a href="https://example.com">No Snippet</a></h2>
</div>
</body>
</html>
"#;
let results = engine.parse_results(html).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].content, "");
}
#[test]
fn test_parse_results_skips_empty_title() {
let engine = DuckDuckGo::new();
let html = r#"
<html>
<body>
<div class="result">
<h2 class="result__title"><a href="https://example.com"></a></h2>
</div>
</body>
</html>
"#;
let results = engine.parse_results(html).unwrap();
assert!(results.is_empty());
}
#[test]
fn test_parse_results_skips_empty_url() {
let engine = DuckDuckGo::new();
let html = r#"
<html>
<body>
<div class="result">
<h2 class="result__title"><a href="">Has Title</a></h2>
</div>
</body>
</html>
"#;
let results = engine.parse_results(html).unwrap();
assert!(results.is_empty());
}
#[test]
fn test_parse_results_no_title_element() {
let engine = DuckDuckGo::new();
let html = r#"
<html>
<body>
<div class="result">
<div class="result__snippet">Orphan snippet</div>
</div>
</body>
</html>
"#;
let results = engine.parse_results(html).unwrap();
assert!(results.is_empty());
}
#[test]
fn test_extract_redirect_url_invalid_encoding() {
let url = "//duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com";
let result = extract_redirect_url(url);
assert!(result.is_some());
}
}