1use regex::Regex;
2use scraper::{Html, Selector};
3
4pub fn first_inner_html(html: &Html, tag: &str) -> Option<String> {
5 let selector = Selector::parse(tag).unwrap();
6
7 if let Some(element) = html.select(&selector).next() {
8 let value = element.inner_html();
9
10 if !value.is_empty() {
11 return Some(value);
12 }
13 }
14
15 None
16}
17
18pub fn find_meta_tag(html: &Html, property: &str) -> Option<String> {
19 let selector = Selector::parse(&format!("meta[property=\"{property}\"]")).unwrap();
20
21 if let Some(element) = html.select(&selector).next() {
22 if let Some(value) = element.value().attr("content") {
23 return Some(value.to_string());
24 }
25 }
26
27 None
28}
29
30pub fn find_link(html: &Html, rel: &str) -> Option<String> {
31 let selector = Selector::parse(&format!("link[rel=\"{rel}\"]")).unwrap();
32
33 if let Some(element) = html.select(&selector).next() {
34 if let Some(value) = element.value().attr("href") {
35 return Some(value.to_string());
36 }
37 }
38
39 None
40}
41
42pub fn remove_html_tags(text: &str) -> String {
44 let re = Regex::new("<(.|\n)*?>").unwrap();
45 let res = re.replace_all(text, "");
46
47 res.to_string()
48}
49
50#[cfg(test)]
51mod tests {
52 use super::remove_html_tags;
53
54 #[test]
55 fn sanitizes_html_text() {
56 let html = "<html><body><p>Hello <b>World</b>!.<br /> This is our<sup>1st</sup> test on sanitization for HTML text</p><body></html>";
57 let sanitized = remove_html_tags(html);
58
59 assert_eq!(
60 sanitized,
61 "Hello World!. This is our1st test on sanitization for HTML text"
62 );
63 }
64}