link_preview/
html.rs

1use regex::Regex;
2use scraper::{Html, Selector};
3
4pub fn first_inner_html(html: &Html, tag: &str) -> Option<String> {
5    let selector = Selector::parse(tag).unwrap();
6
7    if let Some(element) = html.select(&selector).next() {
8        let value = element.inner_html();
9
10        if !value.is_empty() {
11            return Some(value);
12        }
13    }
14
15    None
16}
17
18pub fn find_meta_tag(html: &Html, property: &str) -> Option<String> {
19    let selector = Selector::parse(&format!("meta[property=\"{property}\"]")).unwrap();
20
21    if let Some(element) = html.select(&selector).next() {
22        if let Some(value) = element.value().attr("content") {
23            return Some(value.to_string());
24        }
25    }
26
27    None
28}
29
30pub fn find_link(html: &Html, rel: &str) -> Option<String> {
31    let selector = Selector::parse(&format!("link[rel=\"{rel}\"]")).unwrap();
32
33    if let Some(element) = html.select(&selector).next() {
34        if let Some(value) = element.value().attr("href") {
35            return Some(value.to_string());
36        }
37    }
38
39    None
40}
41
42/// Removes HTML tags from the provided HTML text
43pub fn remove_html_tags(text: &str) -> String {
44    let re = Regex::new("<(.|\n)*?>").unwrap();
45    let res = re.replace_all(text, "");
46
47    res.to_string()
48}
49
50#[cfg(test)]
51mod tests {
52    use super::remove_html_tags;
53
54    #[test]
55    fn sanitizes_html_text() {
56        let html = "<html><body><p>Hello <b>World</b>!.<br /> This is our<sup>1st</sup> test on sanitization for HTML text</p><body></html>";
57        let sanitized = remove_html_tags(html);
58
59        assert_eq!(
60            sanitized,
61            "Hello World!. This is our1st test on sanitization for HTML text"
62        );
63    }
64}