csp_hashes/
lib.rs

1use std::collections::HashSet;
2
3use base64::{engine::general_purpose, Engine};
4use scraper::{Html, Selector};
5use sha2::{Digest, Sha256};
6
7/// Hashes inline <script> and <style> elements using SHA256. Returns an error if input is not a valid HTML document.
8pub fn csp_hashes_from_html_document(html: &str) -> Result<HashSet<String>, String> {
9    let doc = Html::parse_document(html);
10
11    // script elements
12    let script = Selector::parse("script").expect("could not instantiate script selector");
13    let mut hashes = doc
14        .select(&script)
15        .map(|s| {
16            let mut hasher = Sha256::new();
17            hasher.update(s.inner_html().as_bytes());
18            let hash = hasher.finalize();
19            let b64 = general_purpose::STANDARD.encode(hash);
20            format!("sha256-{b64}")
21        })
22        .collect::<HashSet<String>>();
23
24    // style elements
25    let style = Selector::parse("style").expect("could not instantiate script selector");
26    hashes.extend(doc.select(&style).map(|s| {
27        let mut hasher = Sha256::new();
28        hasher.update(s.inner_html().as_bytes());
29        let hash = hasher.finalize();
30        let b64 = general_purpose::STANDARD.encode(hash);
31        format!("sha256-{b64}")
32    }));
33
34    if let Some(error) = doc.errors.first() {
35        Err(error.to_string())
36    } else {
37        Ok(hashes)
38    }
39}
40
41#[cfg(test)]
42mod tests {
43    use super::*;
44
45    #[test]
46    fn valid_html_no_scripts() {
47        assert!(
48            csp_hashes_from_html_document(r#"<!doctype html><title>a</title>"#)
49                .unwrap()
50                .is_empty()
51        );
52    }
53
54    #[test]
55    fn valid_html_with_scripts() {
56        assert_eq!(
57            csp_hashes_from_html_document(
58                r#"<!doctype html>
59                <html>
60                    <head>
61                        <title>woof</title>
62                        <script>console.log("in head")</script>
63                    </head>
64                    <body>
65                        <script>console.log("in body")</script>
66                    </body>
67                </html>"#
68            )
69            .unwrap(),
70            HashSet::from_iter(vec![
71                "sha256-vjwjwnBndhWG+ZN6vpRKSmbicObZIQarx7RgSb3DmA8=".to_string(),
72                "sha256-3oUpClVK/cNQB5x9TStM+xLiHETuIGGp2vGZRQdvHX0=".to_string()
73            ])
74        );
75    }
76
77    #[test]
78    fn invalid_html_errors() {
79        assert!(csp_hashes_from_html_document(
80            r#"<!doctype html>
81                <html>
82                    <head
83                        <script>console.log("in head")</script>
84                    </head>
85                    <body>
86                        <script>console.log("in body")</script>
87                    </body>
88                </html>"#
89        )
90        .is_err());
91    }
92
93    #[test]
94    fn html_fragment_errors() {
95        assert!(csp_hashes_from_html_document(
96            r#"<body>
97                        <script>console.log("in body")</script>
98                    </body>"#
99        )
100        .is_err());
101    }
102
103    #[test]
104    fn valid_html_with_style() {
105        assert_eq!(
106            csp_hashes_from_html_document(
107                r#"<!doctype html>
108                <html>
109                    <head>
110                        <title>woof</title>
111                        <style>
112                            body {
113                                font-family: cursive;
114                            }
115                        </style>
116                    </head>
117                    <body>
118                        bonjour
119                    </body>
120                </html>"#
121            )
122            .unwrap(),
123            HashSet::from_iter(vec![
124                "sha256-4+QNL+2odf47+35bV9by29lQ0daJMNTQRSLy7iRe3uI=".to_string()
125            ])
126        );
127    }
128
129    #[test]
130    fn valid_html_with_style_and_script() {
131        assert_eq!(
132            csp_hashes_from_html_document(
133                r#"<!doctype html>
134                <html>
135                    <head>
136                        <title>woof</title>
137                        <script>console.log("in head")</script>
138                        <style> a { color: red } </style>
139                        <style>
140                            body {
141                                font-family: cursive;
142                            }
143                        </style>
144                    </head>
145                    <body>
146                        <script>console.log("in body")</script>
147                    </body>
148                </html>"#
149            )
150            .unwrap(),
151            HashSet::from_iter(vec![
152                "sha256-4+QNL+2odf47+35bV9by29lQ0daJMNTQRSLy7iRe3uI=".to_string(),
153                "sha256-3oUpClVK/cNQB5x9TStM+xLiHETuIGGp2vGZRQdvHX0=".to_string(),
154                "sha256-vjwjwnBndhWG+ZN6vpRKSmbicObZIQarx7RgSb3DmA8=".to_string(),
155                "sha256-n1Yam9K1WJihP5yKcNNfCE/P1LaxJQmuUucwWsXrHWg=".to_string()
156            ])
157        );
158    }
159}