Skip to main content

matrixcode_core/tools/websearch/
parser.rs

1//! Search result types and parser trait
2
3use serde::{Deserialize, Serialize};
4
5/// A single search result.
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct SearchResult {
8    pub title: String,
9    pub url: String,
10    pub snippet: Option<String>,
11}
12
13/// Trait for parsing search results from different backends
14pub trait SearchResultParser {
15    /// Parse results from raw response text
16    fn parse(&self, content: &str, max_results: usize) -> Vec<SearchResult>;
17}
18
19/// Helper to clean DuckDuckGo redirect URLs
20pub fn clean_url(url: &str) -> String {
21    if url.contains("duckduckgo.com/l/")
22        && let Some(query) = url.split("uddg=").nth(1)
23        && let Some(encoded) = query.split('&').next()
24    {
25        return urlencoding::decode(encoded).unwrap_or_default().into_owned();
26    }
27    url.to_string()
28}
29
30/// Strip HTML tags and decode entities
31pub fn strip_html_tags(s: &str) -> String {
32    let re = regex::Regex::new(r"<[^>]*>").unwrap();
33    let without_tags = re.replace_all(s, "");
34
35    without_tags
36        .replace("&amp;", "&")
37        .replace("&lt;", "<")
38        .replace("&gt;", ">")
39        .replace("&quot;", "\"")
40        .replace("&#39;", "'")
41        .replace("&nbsp;", " ")
42        .trim()
43        .to_string()
44}