contextual_encoder/javascript.rs
1//! javascript contextual output encoders.
2//!
3//! provides four encoding contexts:
4//!
5//! - [`for_javascript`] — universal encoder, safe in HTML attributes, script
6//! blocks, and standalone .js files
7//! - [`for_javascript_attribute`] — optimized for HTML event attributes
8//! (e.g., `onclick="..."`)
9//! - [`for_javascript_block`] — optimized for `<script>` blocks
10//! - [`for_javascript_source`] — optimized for standalone .js / JSON files
11//!
12//! # security notes
13//!
14//! - none of these encoders encode the grave accent (`` ` ``). **never embed
15//! untrusted data directly inside ES2015+ template literals.** instead,
16//! encode the data into a regular javascript string variable, then reference
17//! that variable from the template literal.
18//! - these encoders are for string literal contexts only. they cannot make
19//! arbitrary javascript expressions, variable names, or property accessors
20//! safe.
21//! - `for_javascript_block` and `for_javascript_source` use backslash escapes
22//! for quotes (`\"`, `\'`) which are **not safe in HTML attribute contexts**.
23//! - `for_javascript_attribute` does not escape `/` and is **not safe in
24//! `<script>` blocks** where `</script>` could appear.
25
26use std::fmt;
27
28use crate::engine::encode_loop;
29
30/// configuration flags controlling context-specific encoding differences.
31#[derive(Clone, Copy)]
32struct JsConfig {
33 /// true: `"` → `\x22`, `'` → `\x27` (safe in HTML attributes).
34 /// false: `"` → `\"`, `'` → `\'` (more readable, not HTML-attr safe).
35 hex_quotes: bool,
36 /// true: encode `&` as `\x26` (prevents HTML entity interpretation).
37 encode_ampersand: bool,
38 /// true: encode `/` as `\/` (prevents `</script>` injection).
39 encode_slash: bool,
40}
41
42const JS_UNIVERSAL: JsConfig = JsConfig {
43 hex_quotes: true,
44 encode_ampersand: true,
45 encode_slash: true,
46};
47
48const JS_ATTRIBUTE: JsConfig = JsConfig {
49 hex_quotes: true,
50 encode_ampersand: true,
51 encode_slash: false,
52};
53
54const JS_BLOCK: JsConfig = JsConfig {
55 hex_quotes: false,
56 encode_ampersand: true,
57 encode_slash: true,
58};
59
60const JS_SOURCE: JsConfig = JsConfig {
61 hex_quotes: false,
62 encode_ampersand: false,
63 encode_slash: false,
64};
65
66// ---------------------------------------------------------------------------
67// for_javascript — universal encoder (safe everywhere)
68// ---------------------------------------------------------------------------
69
70/// encodes `input` for safe embedding in a javascript string literal.
71///
72/// this is the universal javascript encoder — its output is safe in HTML
73/// event attributes, `<script>` blocks, and standalone .js files. it is
74/// slightly more conservative than the context-specific encoders.
75///
76/// # encoding rules
77///
78/// - C0 controls → named escapes (`\b`, `\t`, `\n`, `\f`, `\r`) or hex
79/// (`\xHH`)
80/// - `"` → `\x22`, `'` → `\x27` (hex escapes for HTML attribute safety)
81/// - `&` → `\x26` (prevents HTML entity interpretation)
82/// - `/` → `\/` (prevents `</script>` injection)
83/// - `\` → `\\`
84/// - U+2028 → `\u2028`, U+2029 → `\u2029` (javascript line terminators)
85///
86/// # caveat: template literals
87///
88/// this encoder does **not** encode the grave accent (`` ` ``). never
89/// embed untrusted data directly inside template literals. instead:
90///
91/// ```js
92/// // WRONG — vulnerable to XSS:
93/// // `Hello ${unsafeInput}`
94/// //
95/// // RIGHT — encode into a variable first:
96/// // var x = '<encoded>';
97/// // `Hello ${x}`
98/// ```
99///
100/// # examples
101///
102/// ```
103/// use contextual_encoder::for_javascript;
104///
105/// assert_eq!(for_javascript(r#"it's "unsafe" </script>"#),
106/// r"it\x27s \x22unsafe\x22 <\/script>");
107/// assert_eq!(for_javascript("safe"), "safe");
108/// ```
109pub fn for_javascript(input: &str) -> String {
110 encode_js(input, &JS_UNIVERSAL)
111}
112
113/// writes the javascript-encoded form of `input` to `out`.
114///
115/// see [`for_javascript`] for encoding rules.
116pub fn write_javascript<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
117 write_js(out, input, &JS_UNIVERSAL)
118}
119
120// ---------------------------------------------------------------------------
121// for_javascript_attribute — optimized for HTML event attributes
122// ---------------------------------------------------------------------------
123
124/// encodes `input` for safe embedding in a javascript string literal inside
125/// an HTML event attribute (e.g., `onclick="..."`).
126///
127/// identical to [`for_javascript`] except `/` is **not** escaped (not
128/// needed in event attributes where `</script>` is not a concern).
129///
130/// **not safe in `<script>` blocks** — use [`for_javascript`] or
131/// [`for_javascript_block`] instead.
132///
133/// # examples
134///
135/// ```
136/// use contextual_encoder::for_javascript_attribute;
137///
138/// assert_eq!(for_javascript_attribute("a/b"), "a/b");
139/// assert_eq!(for_javascript_attribute("a'b"), r"a\x27b");
140/// ```
141pub fn for_javascript_attribute(input: &str) -> String {
142 encode_js(input, &JS_ATTRIBUTE)
143}
144
145/// writes the javascript-attribute-encoded form of `input` to `out`.
146///
147/// see [`for_javascript_attribute`] for encoding rules.
148pub fn write_javascript_attribute<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
149 write_js(out, input, &JS_ATTRIBUTE)
150}
151
152// ---------------------------------------------------------------------------
153// for_javascript_block — optimized for <script> blocks
154// ---------------------------------------------------------------------------
155
156/// encodes `input` for safe embedding in a javascript string literal inside
157/// an HTML `<script>` block.
158///
159/// uses backslash escapes for quotes (`\"`, `\'`) which are more readable
160/// but **not safe in HTML attribute contexts**. still encodes `&` (for XHTML
161/// compatibility) and `/` (to prevent `</script>` injection).
162///
163/// # examples
164///
165/// ```
166/// use contextual_encoder::for_javascript_block;
167///
168/// assert_eq!(for_javascript_block(r#"he said "hi""#), r#"he said \"hi\""#);
169/// assert_eq!(for_javascript_block("</script>"), r"<\/script>");
170/// ```
171pub fn for_javascript_block(input: &str) -> String {
172 encode_js(input, &JS_BLOCK)
173}
174
175/// writes the javascript-block-encoded form of `input` to `out`.
176///
177/// see [`for_javascript_block`] for encoding rules.
178pub fn write_javascript_block<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
179 write_js(out, input, &JS_BLOCK)
180}
181
182// ---------------------------------------------------------------------------
183// for_javascript_source — optimized for standalone .js files
184// ---------------------------------------------------------------------------
185
186/// encodes `input` for safe embedding in a javascript string literal in a
187/// standalone .js or JSON file.
188///
189/// the most minimal javascript encoder — does not encode `/` or `&` since
190/// there is no HTML context. **not safe for any HTML-embedded context.**
191///
192/// # examples
193///
194/// ```
195/// use contextual_encoder::for_javascript_source;
196///
197/// assert_eq!(for_javascript_source("a/b&c"), "a/b&c");
198/// assert_eq!(for_javascript_source("line\nbreak"), r"line\nbreak");
199/// ```
200pub fn for_javascript_source(input: &str) -> String {
201 encode_js(input, &JS_SOURCE)
202}
203
204/// writes the javascript-source-encoded form of `input` to `out`.
205///
206/// see [`for_javascript_source`] for encoding rules.
207pub fn write_javascript_source<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
208 write_js(out, input, &JS_SOURCE)
209}
210
211// ---------------------------------------------------------------------------
212// shared implementation
213// ---------------------------------------------------------------------------
214
215fn encode_js(input: &str, config: &JsConfig) -> String {
216 let mut out = String::with_capacity(input.len());
217 write_js(&mut out, input, config).expect("writing to string cannot fail");
218 out
219}
220
221fn write_js<W: fmt::Write>(out: &mut W, input: &str, config: &JsConfig) -> fmt::Result {
222 encode_loop(
223 out,
224 input,
225 |c| needs_js_encoding(c, config),
226 |out, c, _next| write_js_encoded(out, c, config),
227 )
228}
229
230fn needs_js_encoding(c: char, config: &JsConfig) -> bool {
231 match c {
232 '\x00'..='\x1F' | '\\' | '"' | '\'' | '\u{2028}' | '\u{2029}' => true,
233 '&' => config.encode_ampersand,
234 '/' => config.encode_slash,
235 _ => false,
236 }
237}
238
239fn write_js_encoded<W: fmt::Write>(out: &mut W, c: char, config: &JsConfig) -> fmt::Result {
240 match c {
241 '\x08' => out.write_str("\\b"),
242 '\t' => out.write_str("\\t"),
243 '\n' => out.write_str("\\n"),
244 '\x0B' => out.write_str("\\x0b"),
245 '\x0C' => out.write_str("\\f"),
246 '\r' => out.write_str("\\r"),
247 '"' if config.hex_quotes => out.write_str("\\x22"),
248 '"' => out.write_str("\\\""),
249 '\'' if config.hex_quotes => out.write_str("\\x27"),
250 '\'' => out.write_str("\\'"),
251 '&' => out.write_str("\\x26"),
252 '/' => out.write_str("\\/"),
253 '\\' => out.write_str("\\\\"),
254 '\u{2028}' => out.write_str("\\u2028"),
255 '\u{2029}' => out.write_str("\\u2029"),
256 // other C0 controls
257 c => write!(out, "\\x{:02x}", c as u32),
258 }
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264
265 // -- for_javascript (universal) --
266
267 #[test]
268 fn js_no_encoding_needed() {
269 assert_eq!(for_javascript("hello world"), "hello world");
270 assert_eq!(for_javascript(""), "");
271 }
272
273 #[test]
274 fn js_encodes_quotes_as_hex() {
275 assert_eq!(for_javascript(r#"a"b"#), r"a\x22b");
276 assert_eq!(for_javascript("a'b"), r"a\x27b");
277 }
278
279 #[test]
280 fn js_encodes_backslash() {
281 assert_eq!(for_javascript(r"a\b"), r"a\\b");
282 }
283
284 #[test]
285 fn js_encodes_ampersand() {
286 assert_eq!(for_javascript("a&b"), r"a\x26b");
287 }
288
289 #[test]
290 fn js_encodes_slash() {
291 assert_eq!(for_javascript("</script>"), r"<\/script>");
292 }
293
294 #[test]
295 fn js_encodes_control_chars() {
296 assert_eq!(for_javascript("\x00"), r"\x00");
297 assert_eq!(for_javascript("\x08"), r"\b");
298 assert_eq!(for_javascript("\t"), r"\t");
299 assert_eq!(for_javascript("\n"), r"\n");
300 assert_eq!(for_javascript("\x0B"), r"\x0b");
301 assert_eq!(for_javascript("\x0C"), r"\f");
302 assert_eq!(for_javascript("\r"), r"\r");
303 assert_eq!(for_javascript("\x1F"), r"\x1f");
304 }
305
306 #[test]
307 fn js_encodes_line_separators() {
308 assert_eq!(for_javascript("\u{2028}"), r"\u2028");
309 assert_eq!(for_javascript("\u{2029}"), r"\u2029");
310 }
311
312 #[test]
313 fn js_preserves_non_ascii() {
314 assert_eq!(for_javascript("café"), "café");
315 assert_eq!(for_javascript("日本語"), "日本語");
316 }
317
318 #[test]
319 fn js_writer_variant() {
320 let mut out = String::new();
321 write_javascript(&mut out, "a'b").unwrap();
322 assert_eq!(out, r"a\x27b");
323 }
324
325 // -- for_javascript_attribute --
326
327 #[test]
328 fn js_attr_does_not_encode_slash() {
329 assert_eq!(for_javascript_attribute("a/b"), "a/b");
330 }
331
332 #[test]
333 fn js_attr_encodes_quotes_as_hex() {
334 assert_eq!(for_javascript_attribute("a'b"), r"a\x27b");
335 }
336
337 #[test]
338 fn js_attr_encodes_ampersand() {
339 assert_eq!(for_javascript_attribute("a&b"), r"a\x26b");
340 }
341
342 // -- for_javascript_block --
343
344 #[test]
345 fn js_block_uses_backslash_quotes() {
346 assert_eq!(for_javascript_block(r#"a"b"#), r#"a\"b"#);
347 assert_eq!(for_javascript_block("a'b"), r"a\'b");
348 }
349
350 #[test]
351 fn js_block_encodes_slash() {
352 assert_eq!(for_javascript_block("a/b"), r"a\/b");
353 }
354
355 #[test]
356 fn js_block_encodes_ampersand() {
357 assert_eq!(for_javascript_block("a&b"), r"a\x26b");
358 }
359
360 // -- for_javascript_source --
361
362 #[test]
363 fn js_source_uses_backslash_quotes() {
364 assert_eq!(for_javascript_source(r#"a"b"#), r#"a\"b"#);
365 assert_eq!(for_javascript_source("a'b"), r"a\'b");
366 }
367
368 #[test]
369 fn js_source_does_not_encode_slash_or_ampersand() {
370 assert_eq!(for_javascript_source("a/b&c"), "a/b&c");
371 }
372
373 #[test]
374 fn js_source_encodes_line_separators() {
375 assert_eq!(for_javascript_source("\u{2028}"), r"\u2028");
376 }
377}