1use lol_html::html_content::{Comment, Element};
34use lol_html::{comments, element, rewrite_str, RewriteStrSettings};
35
36pub struct AllowedElement {
37 pub name: String,
38 pub attributes: Vec<String>,
39}
40
41pub struct Settings {
42 pub allowed: Vec<AllowedElement>,
43 pub remove_comments: bool,
44}
45
46impl Default for Settings {
47 #[inline]
48 fn default() -> Self {
49 Settings {
50 allowed: vec![
51 AllowedElement {
52 name: "div".to_string(),
53 attributes: vec![],
54 },
55 AllowedElement {
56 name: "b".to_string(),
57 attributes: vec![],
58 },
59 AllowedElement {
60 name: "strong".to_string(),
61 attributes: vec![],
62 },
63 AllowedElement {
64 name: "i".to_string(),
65 attributes: vec![],
66 },
67 AllowedElement {
68 name: "em".to_string(),
69 attributes: vec![],
70 },
71 AllowedElement {
72 name: "u".to_string(),
73 attributes: vec![],
74 },
75 AllowedElement {
76 name: "a".to_string(),
77 attributes: vec!["href".to_string(), "title".to_string()],
78 },
79 AllowedElement {
80 name: "ul".to_string(),
81 attributes: vec![],
82 },
83 AllowedElement {
84 name: "ol".to_string(),
85 attributes: vec![],
86 },
87 AllowedElement {
88 name: "li".to_string(),
89 attributes: vec![],
90 },
91 AllowedElement {
92 name: "p".to_string(),
93 attributes: vec!["style".to_string()],
94 },
95 AllowedElement {
96 name: "br".to_string(),
97 attributes: vec![],
98 },
99 AllowedElement {
100 name: "span".to_string(),
101 attributes: vec!["style".to_string()],
102 },
103 AllowedElement {
104 name: "img".to_string(),
105 attributes: vec![
106 "width".to_string(),
107 "height".to_string(),
108 "alt".to_string(),
109 "src".to_string(),
110 ],
111 },
112 ],
113 remove_comments: true,
114 }
115 }
116}
117
118pub fn purifier(input: &str, settings: Settings) -> String {
132 let element_handler = |el: &mut Element| {
133 let find = settings.allowed.iter().find(|e| e.name.eq(&el.tag_name()));
134 match find {
135 Some(find) => {
136 let remove_attributes = el
137 .attributes()
138 .iter()
139 .filter(|e| find.attributes.iter().any(|a| a.eq(&e.name())) == false)
140 .map(|m| m.name())
141 .collect::<Vec<String>>();
142 for attr in remove_attributes {
143 el.remove_attribute(&attr);
144 }
145 }
146 None => {
147 el.remove_and_keep_content();
148 }
149 }
150 Ok(())
151 };
152 let comment_handler = |c: &mut Comment| {
153 if settings.remove_comments {
154 c.remove();
155 }
156 Ok(())
157 };
158 let output = rewrite_str(
159 input,
160 RewriteStrSettings {
161 element_content_handlers: vec![
162 element!("*", element_handler),
163 comments!("*", comment_handler),
164 ],
165 ..RewriteStrSettings::default()
166 },
167 )
168 .unwrap();
169 return output;
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175 #[test]
176 fn test_purifier() {
177 let settings = Settings {
178 ..Settings::default()
179 };
180 let input = r#"<div style="display: block;"><span style="color: black;"><a href="/test" onclick="javascript:;"><img src="/logo.png" onerror="javascript:;"/>Rust</a></span></div>"#;
181 let output = purifier(input, settings);
182 assert_eq!(
183 output,
184 r#"<div><span style="color: black;"><a href="/test"><img src="/logo.png" />Rust</a></span></div>"#
185 );
186 }
187 #[test]
188 fn test_purifier_remove_comments() {
189 let settings = Settings {
190 ..Settings::default()
191 };
192 let input = r#"<div style="display: block;"><!--Comment 1--><span style="color: black;"><a href="/test" onclick="javascript:;"><img src="/logo.png" onerror="javascript:;"/>Rust</a></span></div>"#;
193 let output = purifier(input, settings);
194 assert_eq!(
195 output,
196 r#"<div><span style="color: black;"><a href="/test"><img src="/logo.png" />Rust</a></span></div>"#
197 );
198 }
199 #[test]
200 fn test_purifier_show_comments() {
201 let settings = Settings {
202 remove_comments: false,
203 ..Settings::default()
204 };
205 let input = r#"<div style="display: block;"><span style="color: black;"><!--Comment 1--><a href="/test" onclick="javascript:;"><img src="/logo.png" onerror="javascript:;"/>Rust</a></span></div>"#;
206 let output = purifier(input, settings);
207 assert_eq!(
208 output,
209 r#"<div><span style="color: black;"><!--Comment 1--><a href="/test"><img src="/logo.png" />Rust</a></span></div>"#
210 );
211 }
212}