tiny_clean/
java_script_encoder.rs

1use crate::common::{char_bucket, char_mask, encode_as_hex_byte, encode_as_unicode, dump_masks_to_ascii};
2
3#[derive(Debug, Clone, Copy, PartialEq)]
4pub enum JavaScriptEncoderMode {
5    Source,
6    Block,
7    Html,
8    Attribute,
9}
10
11pub struct JavaScriptEncoder {
12    ascii_only: bool,
13    valid_masks: [u32; 4],
14    hex_encode_quotes: bool,
15}
16
17impl JavaScriptEncoder {
18    pub fn new(mode: JavaScriptEncoderMode, ascii_only: bool) -> Self {
19        let mut valid_masks = [
20            0,
21            u32::MAX & !(char_mask('\'') | char_mask('"')),
22            u32::MAX & !char_mask('\\'),
23            if ascii_only {
24                u32::MAX & !char_mask(127 as char)
25            } else {
26                u32::MAX
27            },
28        ];
29        // For BLOCK or HTML mode, also escape '/' and '-'
30        if mode == JavaScriptEncoderMode::Block || mode == JavaScriptEncoderMode::Html {
31            valid_masks[1] &= !(char_mask('/') | char_mask('-'));
32        }
33
34        // For all modes except SOURCE, escape '&'
35        if mode != JavaScriptEncoderMode::Source {
36            valid_masks[1] &= !char_mask('&');
37        }
38
39        if cfg!(debug_assertions) {
40            dump_masks_to_ascii(&valid_masks);
41        }
42
43        let hex_encode_quotes = mode == JavaScriptEncoderMode::Attribute || mode == JavaScriptEncoderMode::Html;
44        JavaScriptEncoder {
45            ascii_only,
46            valid_masks,
47            hex_encode_quotes,
48        }
49    }
50    const LINE_SEPARATOR: char = '\u{2028}';
51    const PARAGRAPH_SEPARATOR: char = '\u{2029}';
52
53    pub fn encode(&self, input: &str) -> String {
54        let starting_capacity = (u32::MAX / 2).min((input.len() * 6) as u32) as usize;
55        let mut result = String::with_capacity(starting_capacity);
56        for c in input.chars() {
57            if c as u32 <= 127 {
58                let mask_index = char_bucket(c);
59                let character_mask = char_mask(c);
60
61                if (self.valid_masks[mask_index] & character_mask) == 0 {
62                    match c {
63                        '\u{0008}' => {
64                            result.push_str("\\b");
65                            continue;
66                        }
67                        '\u{0009}' => {
68                            result.push_str("\\t");
69                            continue;
70                        }
71                        '\u{000a}' => {
72                            result.push_str("\\n");
73                            continue;
74                        }
75                        '\u{000c}' => {
76                            result.push_str("\\f");
77                            continue;
78                        }
79                        '\u{000d}' => {
80                            result.push_str("\\r");
81                            continue;
82                        }
83                        '\'' | '"' => {
84                            if self.hex_encode_quotes {
85                                encode_as_hex_byte('\\', &mut result, c);
86                                continue;
87                            } else {
88                                result.push('\\');
89                                result.push(c);
90                                continue;
91                            }
92                        }
93                        '\\' | '/' | '-' => {
94                            result.push('\\');
95                            result.push(c);
96                            continue;
97                        }
98                        _ => {
99                            encode_as_hex_byte('\\', &mut result, c);
100                            continue;
101                        }
102                    }
103                }
104            } else if self.ascii_only || c == Self::LINE_SEPARATOR || c == Self::PARAGRAPH_SEPARATOR
105            {
106                if c as u32 <= 0xFF {
107                    encode_as_hex_byte('\\', &mut result, c);
108                    continue;
109                } else {
110                    encode_as_unicode('\\', &mut result, c);
111                    continue;
112                }
113            }
114            result.push(c);
115        }
116
117        result.shrink_to_fit();
118        result
119    }
120}
121
122#[cfg(test)]
123mod test {
124
125    use crate::java_script_encoder::{JavaScriptEncoder, JavaScriptEncoderMode};
126    fn generic_tests(encoder: &JavaScriptEncoder) {
127        assert_eq!("\\b", encoder.encode("\u{8}"));
128        assert_eq!("\\t", encoder.encode("\t"));
129        assert_eq!("\\n", encoder.encode("\n"));
130        assert_eq!("\\r", encoder.encode("\r"));
131        assert_eq!("\\x00", encoder.encode("\u{0000}"));
132        assert_eq!("\\u2028", encoder.encode("\u{2028}"));
133        assert_eq!("\\u2029", encoder.encode("\u{2029}"));
134        assert_eq!("abcd", encoder.encode("abcd"));
135        assert_eq!("ABCD", encoder.encode("ABCD"));
136    }
137
138    fn ascii_only_tests(encoder: &JavaScriptEncoder) {
139        assert_eq!("\\u1234", encoder.encode("\u{1234}"));
140        assert_eq!("\\xff", encoder.encode("\u{ff}"));
141    }
142
143    fn ascii_extended_tests(encoder: &JavaScriptEncoder) {
144        assert_eq!("\u{00ff}", encoder.encode("\u{00ff}"));
145    }
146    #[test]
147    fn t_java_script_block_ascii_only() {
148        let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Block, true);
149        assert_eq!("\\\"", encoder.encode("\""));
150        assert_eq!("\\\'", encoder.encode("\'"));
151        assert_eq!("\\/", encoder.encode("/"));
152        assert_eq!("\\-", encoder.encode("-"));
153        assert_eq!("\\x26", encoder.encode("&"));
154        generic_tests(&encoder);
155        ascii_only_tests(&encoder);
156    }
157
158    #[test]
159    fn t_java_script_block_ascii_extended() {
160        let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Block, false);
161        assert_eq!("\\\"", encoder.encode("\""));
162        assert_eq!("\\\'", encoder.encode("\'"));
163        assert_eq!("\\x26", encoder.encode("&"));
164        assert_eq!("\\/", encoder.encode("/"));
165        generic_tests(&encoder);
166        ascii_extended_tests(&encoder);
167    }
168
169    #[test]
170    fn t_java_script_source_ascii_only() {
171        let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Source, true);
172        assert_eq!("\\\"", encoder.encode("\""));
173        assert_eq!("\\\'", encoder.encode("\'"));
174        assert_eq!("/", encoder.encode("/"));
175        generic_tests(&encoder);
176        ascii_only_tests(&encoder);
177    }
178
179    #[test]
180    fn t_java_script_source_ascii_extended() {
181        let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Source, false);
182        assert_eq!("\\\"", encoder.encode("\""));
183        assert_eq!("\\\'", encoder.encode("\'"));
184        assert_eq!("/", encoder.encode("/"));
185        generic_tests(&encoder);
186        ascii_extended_tests(&encoder);
187    }
188
189    #[test]
190    fn t_java_script_html_ascii_only() {
191        let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Html, true);
192        assert_eq!("\\x22", encoder.encode("\""));
193        assert_eq!("\\x27", encoder.encode("\'"));
194        assert_eq!("\\/", encoder.encode("/"));
195        assert_eq!("\\-", encoder.encode("-"));
196        assert_eq!("\\x26", encoder.encode("&"));
197        generic_tests(&encoder);
198        ascii_only_tests(&encoder);
199    }
200
201    #[test]
202    fn t_java_script_html_ascii_extended() {
203        let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Html, false);
204        assert_eq!("\\x22", encoder.encode("\""));
205        assert_eq!("\\x27", encoder.encode("\'"));
206        assert_eq!("\\/", encoder.encode("/"));
207        assert_eq!("\\-", encoder.encode("-"));
208        assert_eq!("\\x26", encoder.encode("&"));
209        generic_tests(&encoder);
210        ascii_extended_tests(&encoder);
211    }
212
213    #[test]
214    fn t_java_script_attribute_ascii_only() {
215        let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Attribute, true);
216        assert_eq!("\\x22", encoder.encode("\""));
217        assert_eq!("\\x27", encoder.encode("\'"));
218        assert_eq!("/", encoder.encode("/"));
219        assert_eq!("\\x26", encoder.encode("&"));
220        generic_tests(&encoder);
221        ascii_only_tests(&encoder);
222    }
223
224    #[test]
225    fn t_java_script_attribute_ascii_extended() {
226        let encoder = JavaScriptEncoder::new(JavaScriptEncoderMode::Attribute, false);
227        assert_eq!("\\x22", encoder.encode("\""));
228        assert_eq!("\\x27", encoder.encode("\'"));
229        assert_eq!("/", encoder.encode("/"));
230        assert_eq!("\\x26", encoder.encode("&"));
231        generic_tests(&encoder);
232        ascii_extended_tests(&encoder);
233    }
234
235}