Skip to main content

websearch/
extract.rs

1//! Parse DuckDuckGo Lite's table layout into structured results.
2
3use scraper::{Html, Selector};
4use url::Url;
5
6use super::types::SearchResult;
7use crate::compress::compress_text;
8
9/// Resolve the real destination URL from a DDG Lite result href.
10///
11/// DDG Lite wraps targets in a redirect like
12/// `//duckduckgo.com/l/?uddg=<percent-encoded-url>&rut=…`. We pull the
13/// `uddg` parameter back out (already percent-decoded by the URL parser).
14/// Protocol-relative hrefs (`//host/path`) get an `https:` scheme; anything
15/// already absolute is returned unchanged.
16pub fn resolve_result_url(href: &str) -> String {
17    let href = href.trim();
18    if href.is_empty() {
19        return String::new();
20    }
21
22    // Normalize protocol-relative URLs so they can be parsed.
23    let absolute = if let Some(stripped) = href.strip_prefix("//") {
24        format!("https://{stripped}")
25    } else {
26        href.to_string()
27    };
28
29    if let Ok(parsed) = Url::parse(&absolute) {
30        if let Some((_, target)) = parsed.query_pairs().find(|(k, _)| k == "uddg") {
31            return target.into_owned();
32        }
33        return parsed.to_string();
34    }
35
36    absolute
37}
38
39pub fn parse_ddg_lite(html: &str, max_results: usize) -> Vec<SearchResult> {
40    let document = Html::parse_document(html);
41
42    // The `<a class="result-link">` carries title + href; the matching
43    // `<td class="result-snippet">` holds the snippet. They appear in result
44    // order, so zipping by index pairs them up.
45    let link_selector = Selector::parse("a.result-link").unwrap();
46    let snippet_selector = Selector::parse(".result-snippet").unwrap();
47
48    let links = document.select(&link_selector);
49    let mut snippets = document.select(&snippet_selector);
50
51    let mut results = Vec::new();
52    for (i, link) in links.enumerate() {
53        if i >= max_results {
54            break;
55        }
56
57        let title = compress_text(&link.text().collect::<String>());
58        let url = resolve_result_url(link.value().attr("href").unwrap_or(""));
59        let snippet = snippets
60            .next()
61            .map(|s| compress_text(&s.text().collect::<String>()))
62            .unwrap_or_default();
63
64        if title.is_empty() && url.is_empty() {
65            continue;
66        }
67
68        results.push(SearchResult {
69            title,
70            snippet,
71            url,
72            ref_index: results.len() + 1,
73        });
74    }
75
76    results
77}