1use std::collections::HashSet;
2
3use base64::{engine::general_purpose, Engine};
4use scraper::{Html, Selector};
5use sha2::{Digest, Sha256};
6
7pub fn csp_hashes_from_html_document(html: &str) -> Result<HashSet<String>, String> {
9 let doc = Html::parse_document(html);
10
11 let script = Selector::parse("script").expect("could not instantiate script selector");
13 let mut hashes = doc
14 .select(&script)
15 .map(|s| {
16 let mut hasher = Sha256::new();
17 hasher.update(s.inner_html().as_bytes());
18 let hash = hasher.finalize();
19 let b64 = general_purpose::STANDARD.encode(hash);
20 format!("sha256-{b64}")
21 })
22 .collect::<HashSet<String>>();
23
24 let style = Selector::parse("style").expect("could not instantiate script selector");
26 hashes.extend(doc.select(&style).map(|s| {
27 let mut hasher = Sha256::new();
28 hasher.update(s.inner_html().as_bytes());
29 let hash = hasher.finalize();
30 let b64 = general_purpose::STANDARD.encode(hash);
31 format!("sha256-{b64}")
32 }));
33
34 if let Some(error) = doc.errors.first() {
35 Err(error.to_string())
36 } else {
37 Ok(hashes)
38 }
39}
40
41#[cfg(test)]
42mod tests {
43 use super::*;
44
45 #[test]
46 fn valid_html_no_scripts() {
47 assert!(
48 csp_hashes_from_html_document(r#"<!doctype html><title>a</title>"#)
49 .unwrap()
50 .is_empty()
51 );
52 }
53
54 #[test]
55 fn valid_html_with_scripts() {
56 assert_eq!(
57 csp_hashes_from_html_document(
58 r#"<!doctype html>
59 <html>
60 <head>
61 <title>woof</title>
62 <script>console.log("in head")</script>
63 </head>
64 <body>
65 <script>console.log("in body")</script>
66 </body>
67 </html>"#
68 )
69 .unwrap(),
70 HashSet::from_iter(vec![
71 "sha256-vjwjwnBndhWG+ZN6vpRKSmbicObZIQarx7RgSb3DmA8=".to_string(),
72 "sha256-3oUpClVK/cNQB5x9TStM+xLiHETuIGGp2vGZRQdvHX0=".to_string()
73 ])
74 );
75 }
76
77 #[test]
78 fn invalid_html_errors() {
79 assert!(csp_hashes_from_html_document(
80 r#"<!doctype html>
81 <html>
82 <head
83 <script>console.log("in head")</script>
84 </head>
85 <body>
86 <script>console.log("in body")</script>
87 </body>
88 </html>"#
89 )
90 .is_err());
91 }
92
93 #[test]
94 fn html_fragment_errors() {
95 assert!(csp_hashes_from_html_document(
96 r#"<body>
97 <script>console.log("in body")</script>
98 </body>"#
99 )
100 .is_err());
101 }
102
103 #[test]
104 fn valid_html_with_style() {
105 assert_eq!(
106 csp_hashes_from_html_document(
107 r#"<!doctype html>
108 <html>
109 <head>
110 <title>woof</title>
111 <style>
112 body {
113 font-family: cursive;
114 }
115 </style>
116 </head>
117 <body>
118 bonjour
119 </body>
120 </html>"#
121 )
122 .unwrap(),
123 HashSet::from_iter(vec![
124 "sha256-4+QNL+2odf47+35bV9by29lQ0daJMNTQRSLy7iRe3uI=".to_string()
125 ])
126 );
127 }
128
129 #[test]
130 fn valid_html_with_style_and_script() {
131 assert_eq!(
132 csp_hashes_from_html_document(
133 r#"<!doctype html>
134 <html>
135 <head>
136 <title>woof</title>
137 <script>console.log("in head")</script>
138 <style> a { color: red } </style>
139 <style>
140 body {
141 font-family: cursive;
142 }
143 </style>
144 </head>
145 <body>
146 <script>console.log("in body")</script>
147 </body>
148 </html>"#
149 )
150 .unwrap(),
151 HashSet::from_iter(vec![
152 "sha256-4+QNL+2odf47+35bV9by29lQ0daJMNTQRSLy7iRe3uI=".to_string(),
153 "sha256-3oUpClVK/cNQB5x9TStM+xLiHETuIGGp2vGZRQdvHX0=".to_string(),
154 "sha256-vjwjwnBndhWG+ZN6vpRKSmbicObZIQarx7RgSb3DmA8=".to_string(),
155 "sha256-n1Yam9K1WJihP5yKcNNfCE/P1LaxJQmuUucwWsXrHWg=".to_string()
156 ])
157 );
158 }
159}