This crate is a simple HTML sanitizer, build on top of html5ever
With this crate, you can determine for every HTML tag what you want to sanitize. This is done by the Tag struct that gets passed for every HTML tag.
use std::fs::File;
use html_sanitizer::TagParser;
fn main() {
let mut file = File::open("your_html_document.html").unwrap();
let mut tag_parser = TagParser::new(&mut file);
let result = tag_parser.walk(|tag| {
if tag.name == "html" || tag.name == "body" {
tag.ignore_self();
} else if tag.name == "head" || tag.name == "script" || tag.name == "style" {
tag.ignore_self_and_contents();
} else if tag.name == "a" {
tag.allow_attribute(String::from("href"));
} else if tag.name == "img" {
tag.rewrite_as(String::from("<b>Images not allowed</b>"));
} else {
tag.allow_attribute(String::from("style"));
}
});
}