html_to_markdown_rs/
sanitizer.rs1use ammonia::Builder;
4
5use crate::error::Result;
6use crate::options::{PreprocessingOptions, PreprocessingPreset};
7
8pub fn sanitize(html: &str, options: &PreprocessingOptions) -> Result<String> {
13 use std::collections::HashSet;
14
15 let mut builder = match options.preset {
16 PreprocessingPreset::Minimal => create_minimal_builder(),
17 PreprocessingPreset::Standard => create_standard_builder(),
18 PreprocessingPreset::Aggressive => create_aggressive_builder(),
19 };
20
21 let mut clean_content = HashSet::new();
22 let mut allowed_tags = builder.clone_tags();
23
24 clean_content.insert("script");
25 clean_content.insert("style");
26 allowed_tags.remove("script");
27 allowed_tags.remove("style");
28
29 if options.remove_navigation {
30 clean_content.insert("nav");
31 clean_content.insert("aside");
32 clean_content.insert("header");
33 clean_content.insert("footer");
34 allowed_tags.remove("nav");
35 allowed_tags.remove("aside");
36 allowed_tags.remove("header");
37 allowed_tags.remove("footer");
38 }
39
40 if options.remove_forms {
41 clean_content.insert("form");
42 clean_content.insert("input");
43 clean_content.insert("button");
44 clean_content.insert("select");
45 clean_content.insert("textarea");
46 clean_content.insert("label");
47 clean_content.insert("fieldset");
48 clean_content.insert("legend");
49 allowed_tags.remove("form");
50 allowed_tags.remove("input");
51 allowed_tags.remove("button");
52 allowed_tags.remove("select");
53 allowed_tags.remove("textarea");
54 allowed_tags.remove("label");
55 allowed_tags.remove("fieldset");
56 allowed_tags.remove("legend");
57 }
58
59 builder.tags(allowed_tags);
60 builder.clean_content_tags(clean_content);
61
62 Ok(builder.clean(html).to_string())
63}
64
65fn create_minimal_builder() -> Builder<'static> {
67 let mut builder = Builder::default();
68 builder.strip_comments(false);
69 builder
70}
71
72fn create_standard_builder() -> Builder<'static> {
74 let mut builder = Builder::default();
75 builder.strip_comments(true);
76 builder
77}
78
79fn create_aggressive_builder() -> Builder<'static> {
81 let mut builder = Builder::default();
82 builder.strip_comments(true);
83 builder.link_rel(Some("nofollow noopener noreferrer"));
84 builder
85}