tiny_clean/
java_script_encoder.rs1use crate::common::{char_bucket, char_mask, encode_as_hex_byte, encode_as_unicode, dump_masks_to_ascii};
2
3#[derive(Debug, Clone, Copy, PartialEq)]
4pub enum JavaScriptEncoderMode {
5 Source,
6 Block,
7 Html,
8 Attribute,
9}
10
11pub struct JavaScriptEncoder {
12 ascii_only: bool,
13 valid_masks: [u32; 4],
14 hex_encode_quotes: bool,
15}
16
17impl JavaScriptEncoder {
18 pub fn new(mode: JavaScriptEncoderMode, ascii_only: bool) -> Self {
19 let mut valid_masks = [
20 0,
21 u32::MAX & !(char_mask('\'') | char_mask('"')),
22 u32::MAX & !char_mask('\\'),
23 if ascii_only {
24 u32::MAX & !char_mask(127 as char)
25 } else {
26 u32::MAX
27 },
28 ];
29 if mode == JavaScriptEncoderMode::Block || mode == JavaScriptEncoderMode::Html {
31 valid_masks[1] &= !(char_mask('/') | char_mask('-'));
32 }
33
34 if mode != JavaScriptEncoderMode::Source {
36 valid_masks[1] &= !char_mask('&');
37 }
38
39 if cfg!(debug_assertions) {
40 dump_masks_to_ascii(&valid_masks);
41 }
42
43 let hex_encode_quotes = mode == JavaScriptEncoderMode::Attribute || mode == JavaScriptEncoderMode::Html;
44 JavaScriptEncoder {
45 ascii_only,
46 valid_masks,
47 hex_encode_quotes,
48 }
49 }
50 const LINE_SEPARATOR: char = '\u{2028}';
51 const PARAGRAPH_SEPARATOR: char = '\u{2029}';
52
53 pub fn encode(&self, input: &str) -> String {
54 let starting_capacity = (u32::MAX / 2).min((input.len() * 6) as u32) as usize;
55 let mut result = String::with_capacity(starting_capacity);
56 for c in input.chars() {
57 if c as u32 <= 127 {
58 let mask_index = char_bucket(c);
59 let character_mask = char_mask(c);
60
61 if (self.valid_masks[mask_index] & character_mask) == 0 {
62 match c {
63 '\u{0008}' => {
64 result.push_str("\\b");
65 continue;
66 }
67 '\u{0009}' => {
68 result.push_str("\\t");
69 continue;
70 }
71 '\u{000a}' => {
72 result.push_str("\\n");
73 continue;
74 }
75 '\u{000c}' => {
76 result.push_str("\\f");
77 continue;
78 }
79 '\u{000d}' => {
80 result.push_str("\\r");
81 continue;
82 }
83 '\'' | '"' => {
84 if self.hex_encode_quotes {
85 encode_as_hex_byte('\\', &mut result, c);
86 continue;
87 } else {
88 result.push('\\');
89 result.push(c);
90 continue;
91 }
92 }
93 '\\' | '/' | '-' => {
94 result.push('\\');
95 result.push(c);
96 continue;
97 }
98 _ => {
99 encode_as_hex_byte('\\', &mut result, c);
100 continue;
101 }
102 }
103 }
104 } else if self.ascii_only || c == Self::LINE_SEPARATOR || c == Self::PARAGRAPH_SEPARATOR
105 {
106 if c as u32 <= 0xFF {
107 encode_as_hex_byte('\\', &mut result, c);
108 continue;
109 } else {
110 encode_as_unicode('\\', &mut result, c);
111 continue;
112 }
113 }
114 result.push(c);
115 }
116
117 result.shrink_to_fit();
118 result
119 }
120}
121
122#[cfg(test)]
123mod test {
124
125 use crate::java_script_encoder::{JavaScriptEncoder, JavaScriptEncoderMode};
126 fn generic_tests(encoder: &JavaScriptEncoder) {
127 assert_eq!("\\b", encoder.encode("\u{8}"));
128 assert_eq!("\\t", encoder.encode("\t"));
129 assert_eq!("\\n", encoder.encode("\n"));
130 assert_eq!("\\r", encoder.encode("\r"));
131 assert_eq!("\\x00", encoder.encode("\u{0000}"));
132 assert_eq!("\\u2028", encoder.encode("\u{2028}"));
133 assert_eq!("\\u2029", encoder.encode("\u{2029}"));
134 assert_eq!("abcd", encoder.encode("abcd"));
135 assert_eq!("ABCD", encoder.encode("ABCD"));
136 }
137
138 fn ascii_only_tests(encoder: &JavaScriptEncoder) {
139 assert_eq!("\\u1234", encoder.encode("\u{1234}"));
140 assert_eq!("\\xff", encoder.encode("\u{ff}"));
141 }
142
143 fn ascii_extended_tests(encoder: &JavaScriptEncoder) {
144 assert_eq!("\u{00ff}", encoder.encode("\u{00ff}"));
145 }
146 #[test]
147 fn t_java_script_block_ascii_only() {
148 let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Block, true);
149 assert_eq!("\\\"", encoder.encode("\""));
150 assert_eq!("\\\'", encoder.encode("\'"));
151 assert_eq!("\\/", encoder.encode("/"));
152 assert_eq!("\\-", encoder.encode("-"));
153 assert_eq!("\\x26", encoder.encode("&"));
154 generic_tests(&encoder);
155 ascii_only_tests(&encoder);
156 }
157
158 #[test]
159 fn t_java_script_block_ascii_extended() {
160 let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Block, false);
161 assert_eq!("\\\"", encoder.encode("\""));
162 assert_eq!("\\\'", encoder.encode("\'"));
163 assert_eq!("\\x26", encoder.encode("&"));
164 assert_eq!("\\/", encoder.encode("/"));
165 generic_tests(&encoder);
166 ascii_extended_tests(&encoder);
167 }
168
169 #[test]
170 fn t_java_script_source_ascii_only() {
171 let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Source, true);
172 assert_eq!("\\\"", encoder.encode("\""));
173 assert_eq!("\\\'", encoder.encode("\'"));
174 assert_eq!("/", encoder.encode("/"));
175 generic_tests(&encoder);
176 ascii_only_tests(&encoder);
177 }
178
179 #[test]
180 fn t_java_script_source_ascii_extended() {
181 let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Source, false);
182 assert_eq!("\\\"", encoder.encode("\""));
183 assert_eq!("\\\'", encoder.encode("\'"));
184 assert_eq!("/", encoder.encode("/"));
185 generic_tests(&encoder);
186 ascii_extended_tests(&encoder);
187 }
188
189 #[test]
190 fn t_java_script_html_ascii_only() {
191 let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Html, true);
192 assert_eq!("\\x22", encoder.encode("\""));
193 assert_eq!("\\x27", encoder.encode("\'"));
194 assert_eq!("\\/", encoder.encode("/"));
195 assert_eq!("\\-", encoder.encode("-"));
196 assert_eq!("\\x26", encoder.encode("&"));
197 generic_tests(&encoder);
198 ascii_only_tests(&encoder);
199 }
200
201 #[test]
202 fn t_java_script_html_ascii_extended() {
203 let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Html, false);
204 assert_eq!("\\x22", encoder.encode("\""));
205 assert_eq!("\\x27", encoder.encode("\'"));
206 assert_eq!("\\/", encoder.encode("/"));
207 assert_eq!("\\-", encoder.encode("-"));
208 assert_eq!("\\x26", encoder.encode("&"));
209 generic_tests(&encoder);
210 ascii_extended_tests(&encoder);
211 }
212
213 #[test]
214 fn t_java_script_attribute_ascii_only() {
215 let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Attribute, true);
216 assert_eq!("\\x22", encoder.encode("\""));
217 assert_eq!("\\x27", encoder.encode("\'"));
218 assert_eq!("/", encoder.encode("/"));
219 assert_eq!("\\x26", encoder.encode("&"));
220 generic_tests(&encoder);
221 ascii_only_tests(&encoder);
222 }
223
224 #[test]
225 fn t_java_script_attribute_ascii_extended() {
226 let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Attribute, false);
227 assert_eq!("\\x22", encoder.encode("\""));
228 assert_eq!("\\x27", encoder.encode("\'"));
229 assert_eq!("/", encoder.encode("/"));
230 assert_eq!("\\x26", encoder.encode("&"));
231 generic_tests(&encoder);
232 ascii_extended_tests(&encoder);
233 }
234
235}