html_purifier/
lib.rs

1//! # HTML Purifier
2//!
3//! HTML Purifier is a standard HTML filter library.
4//!
5//! > HTML Purifier will not only remove all malicious code (better known as XSS) with a thoroughly audited, secure yet permissive whitelist, it will also make sure your documents are standards compliant, something only achievable with a comprehensive knowledge of W3C's specifications. [HTML Purifier](http://htmlpurifier.org)
6//!
7//! ## Example
8//!
9//! ```
10//! use html_purifier::{purifier, Settings};
11//!
12//! let settings = Settings {
13//!     ..Settings::default()
14//! };
15//! let input = r#"<a href="/test" style="color: black;"><img src="/logo.png" onerror="javascript:;"/>Rust</a>"#;
16//! let output = purifier(input, settings);
17//! ```
18//!
19//! Input HTML
20//!
21//! ```notrust
22//! <a href="/test" style="color: black;"
23//!   ><img src="/logo.png" onerror="javascript:;" />Rust</a
24//! >
25//! ```
26//!
27//! Output HTML
28//!
29//! ```notrust
30//! <a href="/test"><img src="/logo.png" />Rust</a>
31//! ```
32
33use lol_html::html_content::{Comment, Element};
34use lol_html::{comments, element, rewrite_str, RewriteStrSettings};
35
36pub struct AllowedElement {
37    pub name: String,
38    pub attributes: Vec<String>,
39}
40
41pub struct Settings {
42    pub allowed: Vec<AllowedElement>,
43    pub remove_comments: bool,
44}
45
46impl Default for Settings {
47    #[inline]
48    fn default() -> Self {
49        Settings {
50            allowed: vec![
51                AllowedElement {
52                    name: "div".to_string(),
53                    attributes: vec![],
54                },
55                AllowedElement {
56                    name: "b".to_string(),
57                    attributes: vec![],
58                },
59                AllowedElement {
60                    name: "strong".to_string(),
61                    attributes: vec![],
62                },
63                AllowedElement {
64                    name: "i".to_string(),
65                    attributes: vec![],
66                },
67                AllowedElement {
68                    name: "em".to_string(),
69                    attributes: vec![],
70                },
71                AllowedElement {
72                    name: "u".to_string(),
73                    attributes: vec![],
74                },
75                AllowedElement {
76                    name: "a".to_string(),
77                    attributes: vec!["href".to_string(), "title".to_string()],
78                },
79                AllowedElement {
80                    name: "ul".to_string(),
81                    attributes: vec![],
82                },
83                AllowedElement {
84                    name: "ol".to_string(),
85                    attributes: vec![],
86                },
87                AllowedElement {
88                    name: "li".to_string(),
89                    attributes: vec![],
90                },
91                AllowedElement {
92                    name: "p".to_string(),
93                    attributes: vec!["style".to_string()],
94                },
95                AllowedElement {
96                    name: "br".to_string(),
97                    attributes: vec![],
98                },
99                AllowedElement {
100                    name: "span".to_string(),
101                    attributes: vec!["style".to_string()],
102                },
103                AllowedElement {
104                    name: "img".to_string(),
105                    attributes: vec![
106                        "width".to_string(),
107                        "height".to_string(),
108                        "alt".to_string(),
109                        "src".to_string(),
110                    ],
111                },
112            ],
113            remove_comments: true,
114        }
115    }
116}
117
118/// HTML Purifier
119///
120/// # Example
121///
122/// ```
123/// use html_purifier::{purifier, Settings};
124///
125/// let settings = Settings {
126///     ..Settings::default()
127/// };
128/// let input = r#"<a href="/test" style="color: black;"><img src="/logo.png" onerror="javascript:;"/>Rust</a>"#;
129/// let output = purifier(input, settings);
130/// ```
131pub fn purifier(input: &str, settings: Settings) -> String {
132    let element_handler = |el: &mut Element| {
133        let find = settings.allowed.iter().find(|e| e.name.eq(&el.tag_name()));
134        match find {
135            Some(find) => {
136                let remove_attributes = el
137                    .attributes()
138                    .iter()
139                    .filter(|e| find.attributes.iter().any(|a| a.eq(&e.name())) == false)
140                    .map(|m| m.name())
141                    .collect::<Vec<String>>();
142                for attr in remove_attributes {
143                    el.remove_attribute(&attr);
144                }
145            }
146            None => {
147                el.remove_and_keep_content();
148            }
149        }
150        Ok(())
151    };
152    let comment_handler = |c: &mut Comment| {
153        if settings.remove_comments {
154            c.remove();
155        }
156        Ok(())
157    };
158    let output = rewrite_str(
159        input,
160        RewriteStrSettings {
161            element_content_handlers: vec![
162                element!("*", element_handler),
163                comments!("*", comment_handler),
164            ],
165            ..RewriteStrSettings::default()
166        },
167    )
168    .unwrap();
169    return output;
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175    #[test]
176    fn test_purifier() {
177        let settings = Settings {
178            ..Settings::default()
179        };
180        let input = r#"<div style="display: block;"><span style="color: black;"><a href="/test" onclick="javascript:;"><img src="/logo.png" onerror="javascript:;"/>Rust</a></span></div>"#;
181        let output = purifier(input, settings);
182        assert_eq!(
183            output,
184            r#"<div><span style="color: black;"><a href="/test"><img src="/logo.png" />Rust</a></span></div>"#
185        );
186    }
187    #[test]
188    fn test_purifier_remove_comments() {
189        let settings = Settings {
190            ..Settings::default()
191        };
192        let input = r#"<div style="display: block;"><!--Comment 1--><span style="color: black;"><a href="/test" onclick="javascript:;"><img src="/logo.png" onerror="javascript:;"/>Rust</a></span></div>"#;
193        let output = purifier(input, settings);
194        assert_eq!(
195            output,
196            r#"<div><span style="color: black;"><a href="/test"><img src="/logo.png" />Rust</a></span></div>"#
197        );
198    }
199    #[test]
200    fn test_purifier_show_comments() {
201        let settings = Settings {
202            remove_comments: false,
203            ..Settings::default()
204        };
205        let input = r#"<div style="display: block;"><span style="color: black;"><!--Comment 1--><a href="/test" onclick="javascript:;"><img src="/logo.png" onerror="javascript:;"/>Rust</a></span></div>"#;
206        let output = purifier(input, settings);
207        assert_eq!(
208            output,
209            r#"<div><span style="color: black;"><!--Comment 1--><a href="/test"><img src="/logo.png" />Rust</a></span></div>"#
210        );
211    }
212}