Skip to main content

tail_fin_common/
html.rs

1/// Strip HTML tags from a string, returning plain text.
2pub fn strip_html(s: &str) -> String {
3    let mut result = String::with_capacity(s.len());
4    let mut in_tag = false;
5    for ch in s.chars() {
6        match ch {
7            '<' => in_tag = true,
8            '>' => in_tag = false,
9            _ if !in_tag => result.push(ch),
10            _ => {}
11        }
12    }
13    html_decode(&result)
14}
15
16/// Remove CDATA wrappers from a string.
17pub fn strip_cdata(s: &str) -> String {
18    s.replace("<![CDATA[", "")
19        .replace("]]>", "")
20        .trim()
21        .to_string()
22}
23
24/// Decode common HTML entities.
25pub fn html_decode(s: &str) -> String {
26    s.replace("&amp;", "&")
27        .replace("&lt;", "<")
28        .replace("&gt;", ">")
29        .replace("&quot;", "\"")
30        .replace("&#39;", "'")
31        .replace("&nbsp;", " ")
32}
33
34#[cfg(test)]
35mod tests {
36    use super::*;
37
38    #[test]
39    fn test_strip_html_basic() {
40        assert_eq!(strip_html("<p>Hello <b>world</b></p>"), "Hello world");
41    }
42
43    #[test]
44    fn test_strip_html_entities() {
45        assert_eq!(strip_html("&amp; &lt;test&gt;"), "& <test>");
46    }
47
48    #[test]
49    fn test_strip_html_no_tags() {
50        assert_eq!(strip_html("plain text"), "plain text");
51    }
52
53    #[test]
54    fn test_strip_cdata() {
55        assert_eq!(strip_cdata("<![CDATA[Hello]]>"), "Hello");
56    }
57
58    #[test]
59    fn test_html_decode() {
60        assert_eq!(html_decode("&amp; &quot;hi&quot;"), "& \"hi\"");
61    }
62}