contextual_encoder/javascript.rs
1//! javascript contextual output encoders.
2//!
3//! provides five encoding contexts:
4//!
5//! - [`for_javascript`] — universal encoder, safe in HTML attributes, script
6//! blocks, and standalone .js files
7//! - [`for_javascript_attribute`] — optimized for HTML event attributes
8//! (e.g., `onclick="..."`)
9//! - [`for_javascript_block`] — optimized for `<script>` blocks
10//! - [`for_javascript_source`] — optimized for standalone .js / JSON files
11//! - [`for_js_template`] — for ES6 template literal content (`` `...` ``)
12//!
13//! # security notes
14//!
15//! - the string literal encoders ([`for_javascript`], [`for_javascript_attribute`],
16//! [`for_javascript_block`], [`for_javascript_source`]) do **not** encode the
17//! grave accent (`` ` ``). do not use them to embed data inside template
18//! literals — use [`for_js_template`] instead.
19//! - these encoders are for string/template literal contexts only. they cannot
20//! make arbitrary javascript expressions, variable names, or property
21//! accessors safe.
22//! - `for_javascript_block` and `for_javascript_source` use backslash escapes
23//! for quotes (`\"`, `\'`) which are **not safe in HTML attribute contexts**.
24//! - `for_javascript_attribute` does not escape `/` and is **not safe in
25//! `<script>` blocks** where `</script>` could appear.
26
27use std::fmt;
28
29use crate::engine::encode_loop;
30
31/// configuration flags controlling context-specific encoding differences.
32#[derive(Clone, Copy)]
33struct JsConfig {
34 /// true: `"` → `\x22`, `'` → `\x27` (safe in HTML attributes).
35 /// false: `"` → `\"`, `'` → `\'` (more readable, not HTML-attr safe).
36 hex_quotes: bool,
37 /// true: encode `&` as `\x26` (prevents HTML entity interpretation).
38 encode_ampersand: bool,
39 /// true: encode `/` as `\/` (prevents `</script>` injection).
40 encode_slash: bool,
41}
42
43const JS_UNIVERSAL: JsConfig = JsConfig {
44 hex_quotes: true,
45 encode_ampersand: true,
46 encode_slash: true,
47};
48
49const JS_ATTRIBUTE: JsConfig = JsConfig {
50 hex_quotes: true,
51 encode_ampersand: true,
52 encode_slash: false,
53};
54
55const JS_BLOCK: JsConfig = JsConfig {
56 hex_quotes: false,
57 encode_ampersand: true,
58 encode_slash: true,
59};
60
61const JS_SOURCE: JsConfig = JsConfig {
62 hex_quotes: false,
63 encode_ampersand: false,
64 encode_slash: false,
65};
66
67// ---------------------------------------------------------------------------
68// for_javascript — universal encoder (safe everywhere)
69// ---------------------------------------------------------------------------
70
71/// encodes `input` for safe embedding in a javascript string literal.
72///
73/// this is the universal javascript encoder — its output is safe in HTML
74/// event attributes, `<script>` blocks, and standalone .js files. it is
75/// slightly more conservative than the context-specific encoders.
76///
77/// # encoding rules
78///
79/// - C0 controls → named escapes (`\b`, `\t`, `\n`, `\f`, `\r`) or hex
80/// (`\xHH`)
81/// - `"` → `\x22`, `'` → `\x27` (hex escapes for HTML attribute safety)
82/// - `&` → `\x26` (prevents HTML entity interpretation)
83/// - `/` → `\/` (prevents `</script>` injection)
84/// - `\` → `\\`
85/// - U+2028 → `\u2028`, U+2029 → `\u2029` (javascript line terminators)
86///
87/// # caveat: template literals
88///
89/// this encoder does **not** encode the grave accent (`` ` ``). never
90/// embed untrusted data directly inside template literals. instead:
91///
92/// ```js
93/// // WRONG — vulnerable to XSS:
94/// // `Hello ${unsafeInput}`
95/// //
96/// // RIGHT — encode into a variable first:
97/// // var x = '<encoded>';
98/// // `Hello ${x}`
99/// ```
100///
101/// # examples
102///
103/// ```
104/// use contextual_encoder::for_javascript;
105///
106/// assert_eq!(for_javascript(r#"it's "unsafe" </script>"#),
107/// r"it\x27s \x22unsafe\x22 <\/script>");
108/// assert_eq!(for_javascript("safe"), "safe");
109/// ```
110pub fn for_javascript(input: &str) -> String {
111 encode_js(input, &JS_UNIVERSAL)
112}
113
114/// writes the javascript-encoded form of `input` to `out`.
115///
116/// see [`for_javascript`] for encoding rules.
117pub fn write_javascript<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
118 write_js(out, input, &JS_UNIVERSAL)
119}
120
121// ---------------------------------------------------------------------------
122// for_javascript_attribute — optimized for HTML event attributes
123// ---------------------------------------------------------------------------
124
125/// encodes `input` for safe embedding in a javascript string literal inside
126/// an HTML event attribute (e.g., `onclick="..."`).
127///
128/// identical to [`for_javascript`] except `/` is **not** escaped (not
129/// needed in event attributes where `</script>` is not a concern).
130///
131/// **not safe in `<script>` blocks** — use [`for_javascript`] or
132/// [`for_javascript_block`] instead.
133///
134/// # examples
135///
136/// ```
137/// use contextual_encoder::for_javascript_attribute;
138///
139/// assert_eq!(for_javascript_attribute("a/b"), "a/b");
140/// assert_eq!(for_javascript_attribute("a'b"), r"a\x27b");
141/// ```
142pub fn for_javascript_attribute(input: &str) -> String {
143 encode_js(input, &JS_ATTRIBUTE)
144}
145
146/// writes the javascript-attribute-encoded form of `input` to `out`.
147///
148/// see [`for_javascript_attribute`] for encoding rules.
149pub fn write_javascript_attribute<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
150 write_js(out, input, &JS_ATTRIBUTE)
151}
152
153// ---------------------------------------------------------------------------
154// for_javascript_block — optimized for <script> blocks
155// ---------------------------------------------------------------------------
156
157/// encodes `input` for safe embedding in a javascript string literal inside
158/// an HTML `<script>` block.
159///
160/// uses backslash escapes for quotes (`\"`, `\'`) which are more readable
161/// but **not safe in HTML attribute contexts**. still encodes `&` (for XHTML
162/// compatibility) and `/` (to prevent `</script>` injection).
163///
164/// # examples
165///
166/// ```
167/// use contextual_encoder::for_javascript_block;
168///
169/// assert_eq!(for_javascript_block(r#"he said "hi""#), r#"he said \"hi\""#);
170/// assert_eq!(for_javascript_block("</script>"), r"<\/script>");
171/// ```
172pub fn for_javascript_block(input: &str) -> String {
173 encode_js(input, &JS_BLOCK)
174}
175
176/// writes the javascript-block-encoded form of `input` to `out`.
177///
178/// see [`for_javascript_block`] for encoding rules.
179pub fn write_javascript_block<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
180 write_js(out, input, &JS_BLOCK)
181}
182
183// ---------------------------------------------------------------------------
184// for_javascript_source — optimized for standalone .js files
185// ---------------------------------------------------------------------------
186
187/// encodes `input` for safe embedding in a javascript string literal in a
188/// standalone .js or JSON file.
189///
190/// the most minimal javascript encoder — does not encode `/` or `&` since
191/// there is no HTML context. **not safe for any HTML-embedded context.**
192///
193/// # examples
194///
195/// ```
196/// use contextual_encoder::for_javascript_source;
197///
198/// assert_eq!(for_javascript_source("a/b&c"), "a/b&c");
199/// assert_eq!(for_javascript_source("line\nbreak"), r"line\nbreak");
200/// ```
201pub fn for_javascript_source(input: &str) -> String {
202 encode_js(input, &JS_SOURCE)
203}
204
205/// writes the javascript-source-encoded form of `input` to `out`.
206///
207/// see [`for_javascript_source`] for encoding rules.
208pub fn write_javascript_source<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
209 write_js(out, input, &JS_SOURCE)
210}
211
212// ---------------------------------------------------------------------------
213// for_js_template — ES6 template literal encoder
214// ---------------------------------------------------------------------------
215
216/// encodes `input` for safe embedding inside an ES6 template literal
217/// (`` `...` ``).
218///
219/// template literals use backticks as delimiters and `${...}` for
220/// interpolation. this encoder escapes both so untrusted data cannot break
221/// out of the literal or inject expressions.
222///
223/// # encoding rules
224///
225/// - `` ` `` → `` \` `` (prevents breaking out of the template literal)
226/// - `$` followed by `{` → `\${` (prevents expression interpolation)
227/// - `\` → `\\`
228/// - `/` → `\/` (prevents `</script>` injection)
229/// - C0 controls → named escapes (`\b`, `\t`, `\n`, `\f`, `\r`) or hex
230/// (`\xHH`)
231/// - U+2028 → `\u2028`, U+2029 → `\u2029` (line/paragraph separators)
232///
233/// unlike the string literal encoders, this does **not** escape `"` or `'`
234/// (they are ordinary characters inside template literals).
235///
236/// # examples
237///
238/// ```
239/// use contextual_encoder::for_js_template;
240///
241/// assert_eq!(for_js_template("hello `world`"), r"hello \`world\`");
242/// assert_eq!(for_js_template("${alert(1)}"), r"\${alert(1)}");
243/// assert_eq!(for_js_template("safe"), "safe");
244/// assert_eq!(for_js_template("a $ b"), "a $ b");
245/// ```
246pub fn for_js_template(input: &str) -> String {
247 let mut out = String::with_capacity(input.len());
248 write_js_template(&mut out, input).expect("writing to string cannot fail");
249 out
250}
251
252/// writes the template-literal-encoded form of `input` to `out`.
253///
254/// see [`for_js_template`] for encoding rules.
255pub fn write_js_template<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
256 encode_loop(
257 out,
258 input,
259 needs_js_template_encoding,
260 write_js_template_encoded,
261 )
262}
263
264fn needs_js_template_encoding(c: char) -> bool {
265 matches!(
266 c,
267 '\x00'..='\x1F' | '\\' | '`' | '$' | '/' | '\u{2028}' | '\u{2029}'
268 )
269}
270
271fn write_js_template_encoded<W: fmt::Write>(
272 out: &mut W,
273 c: char,
274 next: Option<char>,
275) -> fmt::Result {
276 match c {
277 '\x08' => out.write_str("\\b"),
278 '\t' => out.write_str("\\t"),
279 '\n' => out.write_str("\\n"),
280 '\x0B' => out.write_str("\\x0b"),
281 '\x0C' => out.write_str("\\f"),
282 '\r' => out.write_str("\\r"),
283 '`' => out.write_str("\\`"),
284 '$' if next == Some('{') => out.write_str("\\$"),
285 '$' => out.write_char('$'),
286 '/' => out.write_str("\\/"),
287 '\\' => out.write_str("\\\\"),
288 '\u{2028}' => out.write_str("\\u2028"),
289 '\u{2029}' => out.write_str("\\u2029"),
290 // other C0 controls
291 c => write!(out, "\\x{:02x}", c as u32),
292 }
293}
294
295// ---------------------------------------------------------------------------
296// shared implementation (string literal encoders)
297// ---------------------------------------------------------------------------
298
299fn encode_js(input: &str, config: &JsConfig) -> String {
300 let mut out = String::with_capacity(input.len());
301 write_js(&mut out, input, config).expect("writing to string cannot fail");
302 out
303}
304
305fn write_js<W: fmt::Write>(out: &mut W, input: &str, config: &JsConfig) -> fmt::Result {
306 encode_loop(
307 out,
308 input,
309 |c| needs_js_encoding(c, config),
310 |out, c, _next| write_js_encoded(out, c, config),
311 )
312}
313
314fn needs_js_encoding(c: char, config: &JsConfig) -> bool {
315 match c {
316 '\x00'..='\x1F' | '\\' | '"' | '\'' | '\u{2028}' | '\u{2029}' => true,
317 '&' => config.encode_ampersand,
318 '/' => config.encode_slash,
319 _ => false,
320 }
321}
322
323fn write_js_encoded<W: fmt::Write>(out: &mut W, c: char, config: &JsConfig) -> fmt::Result {
324 match c {
325 '\x08' => out.write_str("\\b"),
326 '\t' => out.write_str("\\t"),
327 '\n' => out.write_str("\\n"),
328 '\x0B' => out.write_str("\\x0b"),
329 '\x0C' => out.write_str("\\f"),
330 '\r' => out.write_str("\\r"),
331 '"' if config.hex_quotes => out.write_str("\\x22"),
332 '"' => out.write_str("\\\""),
333 '\'' if config.hex_quotes => out.write_str("\\x27"),
334 '\'' => out.write_str("\\'"),
335 '&' => out.write_str("\\x26"),
336 '/' => out.write_str("\\/"),
337 '\\' => out.write_str("\\\\"),
338 '\u{2028}' => out.write_str("\\u2028"),
339 '\u{2029}' => out.write_str("\\u2029"),
340 // other C0 controls
341 c => write!(out, "\\x{:02x}", c as u32),
342 }
343}
344
345#[cfg(test)]
346mod tests {
347 use super::*;
348
349 // -- for_javascript (universal) --
350
351 #[test]
352 fn js_no_encoding_needed() {
353 assert_eq!(for_javascript("hello world"), "hello world");
354 assert_eq!(for_javascript(""), "");
355 }
356
357 #[test]
358 fn js_encodes_quotes_as_hex() {
359 assert_eq!(for_javascript(r#"a"b"#), r"a\x22b");
360 assert_eq!(for_javascript("a'b"), r"a\x27b");
361 }
362
363 #[test]
364 fn js_encodes_backslash() {
365 assert_eq!(for_javascript(r"a\b"), r"a\\b");
366 }
367
368 #[test]
369 fn js_encodes_ampersand() {
370 assert_eq!(for_javascript("a&b"), r"a\x26b");
371 }
372
373 #[test]
374 fn js_encodes_slash() {
375 assert_eq!(for_javascript("</script>"), r"<\/script>");
376 }
377
378 #[test]
379 fn js_encodes_control_chars() {
380 assert_eq!(for_javascript("\x00"), r"\x00");
381 assert_eq!(for_javascript("\x08"), r"\b");
382 assert_eq!(for_javascript("\t"), r"\t");
383 assert_eq!(for_javascript("\n"), r"\n");
384 assert_eq!(for_javascript("\x0B"), r"\x0b");
385 assert_eq!(for_javascript("\x0C"), r"\f");
386 assert_eq!(for_javascript("\r"), r"\r");
387 assert_eq!(for_javascript("\x1F"), r"\x1f");
388 }
389
390 #[test]
391 fn js_encodes_line_separators() {
392 assert_eq!(for_javascript("\u{2028}"), r"\u2028");
393 assert_eq!(for_javascript("\u{2029}"), r"\u2029");
394 }
395
396 #[test]
397 fn js_preserves_non_ascii() {
398 assert_eq!(for_javascript("café"), "café");
399 assert_eq!(for_javascript("日本語"), "日本語");
400 }
401
402 #[test]
403 fn js_writer_variant() {
404 let mut out = String::new();
405 write_javascript(&mut out, "a'b").unwrap();
406 assert_eq!(out, r"a\x27b");
407 }
408
409 // -- for_javascript_attribute --
410
411 #[test]
412 fn js_attr_does_not_encode_slash() {
413 assert_eq!(for_javascript_attribute("a/b"), "a/b");
414 }
415
416 #[test]
417 fn js_attr_encodes_quotes_as_hex() {
418 assert_eq!(for_javascript_attribute("a'b"), r"a\x27b");
419 }
420
421 #[test]
422 fn js_attr_encodes_ampersand() {
423 assert_eq!(for_javascript_attribute("a&b"), r"a\x26b");
424 }
425
426 // -- for_javascript_block --
427
428 #[test]
429 fn js_block_uses_backslash_quotes() {
430 assert_eq!(for_javascript_block(r#"a"b"#), r#"a\"b"#);
431 assert_eq!(for_javascript_block("a'b"), r"a\'b");
432 }
433
434 #[test]
435 fn js_block_encodes_slash() {
436 assert_eq!(for_javascript_block("a/b"), r"a\/b");
437 }
438
439 #[test]
440 fn js_block_encodes_ampersand() {
441 assert_eq!(for_javascript_block("a&b"), r"a\x26b");
442 }
443
444 // -- for_javascript_source --
445
446 #[test]
447 fn js_source_uses_backslash_quotes() {
448 assert_eq!(for_javascript_source(r#"a"b"#), r#"a\"b"#);
449 assert_eq!(for_javascript_source("a'b"), r"a\'b");
450 }
451
452 #[test]
453 fn js_source_does_not_encode_slash_or_ampersand() {
454 assert_eq!(for_javascript_source("a/b&c"), "a/b&c");
455 }
456
457 #[test]
458 fn js_source_encodes_line_separators() {
459 assert_eq!(for_javascript_source("\u{2028}"), r"\u2028");
460 }
461
462 // -- for_js_template --
463
464 #[test]
465 fn js_template_no_encoding_needed() {
466 assert_eq!(for_js_template("hello world"), "hello world");
467 assert_eq!(for_js_template(""), "");
468 }
469
470 #[test]
471 fn js_template_encodes_backtick() {
472 assert_eq!(for_js_template("hello `world`"), r"hello \`world\`");
473 assert_eq!(for_js_template("`"), r"\`");
474 }
475
476 #[test]
477 fn js_template_encodes_interpolation() {
478 assert_eq!(for_js_template("${alert(1)}"), r"\${alert(1)}");
479 assert_eq!(for_js_template("a${b}c"), r"a\${b}c");
480 assert_eq!(for_js_template("${a}${b}"), r"\${a}\${b}");
481 }
482
483 #[test]
484 fn js_template_dollar_without_brace_passes_through() {
485 assert_eq!(for_js_template("a $ b"), "a $ b");
486 assert_eq!(for_js_template("$100"), "$100");
487 assert_eq!(for_js_template("a$"), "a$");
488 }
489
490 #[test]
491 fn js_template_encodes_backslash() {
492 assert_eq!(for_js_template(r"a\b"), r"a\\b");
493 }
494
495 #[test]
496 fn js_template_encodes_slash() {
497 assert_eq!(for_js_template("</script>"), r"<\/script>");
498 }
499
500 #[test]
501 fn js_template_does_not_encode_quotes() {
502 assert_eq!(for_js_template(r#"a"b"#), r#"a"b"#);
503 assert_eq!(for_js_template("a'b"), "a'b");
504 }
505
506 #[test]
507 fn js_template_encodes_control_chars() {
508 assert_eq!(for_js_template("\x00"), r"\x00");
509 assert_eq!(for_js_template("\x08"), r"\b");
510 assert_eq!(for_js_template("\t"), r"\t");
511 assert_eq!(for_js_template("\n"), r"\n");
512 assert_eq!(for_js_template("\x0B"), r"\x0b");
513 assert_eq!(for_js_template("\x0C"), r"\f");
514 assert_eq!(for_js_template("\r"), r"\r");
515 assert_eq!(for_js_template("\x1F"), r"\x1f");
516 }
517
518 #[test]
519 fn js_template_encodes_line_separators() {
520 assert_eq!(for_js_template("\u{2028}"), r"\u2028");
521 assert_eq!(for_js_template("\u{2029}"), r"\u2029");
522 }
523
524 #[test]
525 fn js_template_preserves_non_ascii() {
526 assert_eq!(for_js_template("café"), "café");
527 assert_eq!(for_js_template("日本語"), "日本語");
528 assert_eq!(for_js_template("😀"), "😀");
529 }
530
531 #[test]
532 fn js_template_mixed_input() {
533 assert_eq!(
534 for_js_template("`Hello ${name}`, welcome\\n"),
535 r"\`Hello \${name}\`, welcome\\n"
536 );
537 }
538
539 #[test]
540 fn js_template_writer_variant() {
541 let input = "`test` ${x} café";
542 let string_result = for_js_template(input);
543 let mut writer_result = String::new();
544 write_js_template(&mut writer_result, input).unwrap();
545 assert_eq!(string_result, writer_result);
546 }
547}