Skip to main content

contextual_encoder/
javascript.rs

1//! javascript contextual output encoders.
2//!
3//! provides five encoding contexts:
4//!
5//! - [`for_javascript`] — universal encoder, safe in HTML attributes, script
6//!   blocks, and standalone .js files
7//! - [`for_javascript_attribute`] — optimized for HTML event attributes
8//!   (e.g., `onclick="..."`)
9//! - [`for_javascript_block`] — optimized for `<script>` blocks
10//! - [`for_javascript_source`] — optimized for standalone .js / JSON files
11//! - [`for_js_template`] — for ES6 template literal content (`` `...` ``)
12//!
13//! # security notes
14//!
15//! - the string literal encoders ([`for_javascript`], [`for_javascript_attribute`],
16//!   [`for_javascript_block`], [`for_javascript_source`]) do **not** encode the
17//!   grave accent (`` ` ``). do not use them to embed data inside template
18//!   literals — use [`for_js_template`] instead.
19//! - these encoders are for string/template literal contexts only. they cannot
20//!   make arbitrary javascript expressions, variable names, or property
21//!   accessors safe.
22//! - `for_javascript_block` and `for_javascript_source` use backslash escapes
23//!   for quotes (`\"`, `\'`) which are **not safe in HTML attribute contexts**.
24//! - `for_javascript_attribute` does not escape `/` and is **not safe in
25//!   `<script>` blocks** where `</script>` could appear.
26
27use std::fmt;
28
29use crate::engine::encode_loop;
30
31/// configuration flags controlling context-specific encoding differences.
32#[derive(Clone, Copy)]
33struct JsConfig {
34    /// true: `"` → `\x22`, `'` → `\x27` (safe in HTML attributes).
35    /// false: `"` → `\"`, `'` → `\'` (more readable, not HTML-attr safe).
36    hex_quotes: bool,
37    /// true: encode `&` as `\x26` (prevents HTML entity interpretation).
38    encode_ampersand: bool,
39    /// true: encode `/` as `\/` (prevents `</script>` injection).
40    encode_slash: bool,
41}
42
43const JS_UNIVERSAL: JsConfig = JsConfig {
44    hex_quotes: true,
45    encode_ampersand: true,
46    encode_slash: true,
47};
48
49const JS_ATTRIBUTE: JsConfig = JsConfig {
50    hex_quotes: true,
51    encode_ampersand: true,
52    encode_slash: false,
53};
54
55const JS_BLOCK: JsConfig = JsConfig {
56    hex_quotes: false,
57    encode_ampersand: true,
58    encode_slash: true,
59};
60
61const JS_SOURCE: JsConfig = JsConfig {
62    hex_quotes: false,
63    encode_ampersand: false,
64    encode_slash: false,
65};
66
67// ---------------------------------------------------------------------------
68// for_javascript — universal encoder (safe everywhere)
69// ---------------------------------------------------------------------------
70
71/// encodes `input` for safe embedding in a javascript string literal.
72///
73/// this is the universal javascript encoder — its output is safe in HTML
74/// event attributes, `<script>` blocks, and standalone .js files. it is
75/// slightly more conservative than the context-specific encoders.
76///
77/// # encoding rules
78///
79/// - C0 controls → named escapes (`\b`, `\t`, `\n`, `\f`, `\r`) or hex
80///   (`\xHH`)
81/// - `"` → `\x22`, `'` → `\x27` (hex escapes for HTML attribute safety)
82/// - `&` → `\x26` (prevents HTML entity interpretation)
83/// - `/` → `\/` (prevents `</script>` injection)
84/// - `\` → `\\`
85/// - U+2028 → `\u2028`, U+2029 → `\u2029` (javascript line terminators)
86///
87/// # caveat: template literals
88///
89/// this encoder does **not** encode the grave accent (`` ` ``). never
90/// embed untrusted data directly inside template literals. instead:
91///
92/// ```js
93/// // WRONG — vulnerable to XSS:
94/// // `Hello ${unsafeInput}`
95/// //
96/// // RIGHT — encode into a variable first:
97/// // var x = '<encoded>';
98/// // `Hello ${x}`
99/// ```
100///
101/// # examples
102///
103/// ```
104/// use contextual_encoder::for_javascript;
105///
106/// assert_eq!(for_javascript(r#"it's "unsafe" </script>"#),
107///            r"it\x27s \x22unsafe\x22 <\/script>");
108/// assert_eq!(for_javascript("safe"), "safe");
109/// ```
110pub fn for_javascript(input: &str) -> String {
111    encode_js(input, &JS_UNIVERSAL)
112}
113
114/// writes the javascript-encoded form of `input` to `out`.
115///
116/// see [`for_javascript`] for encoding rules.
117pub fn write_javascript<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
118    write_js(out, input, &JS_UNIVERSAL)
119}
120
121// ---------------------------------------------------------------------------
122// for_javascript_attribute — optimized for HTML event attributes
123// ---------------------------------------------------------------------------
124
125/// encodes `input` for safe embedding in a javascript string literal inside
126/// an HTML event attribute (e.g., `onclick="..."`).
127///
128/// identical to [`for_javascript`] except `/` is **not** escaped (not
129/// needed in event attributes where `</script>` is not a concern).
130///
131/// **not safe in `<script>` blocks** — use [`for_javascript`] or
132/// [`for_javascript_block`] instead.
133///
134/// # examples
135///
136/// ```
137/// use contextual_encoder::for_javascript_attribute;
138///
139/// assert_eq!(for_javascript_attribute("a/b"), "a/b");
140/// assert_eq!(for_javascript_attribute("a'b"), r"a\x27b");
141/// ```
142pub fn for_javascript_attribute(input: &str) -> String {
143    encode_js(input, &JS_ATTRIBUTE)
144}
145
146/// writes the javascript-attribute-encoded form of `input` to `out`.
147///
148/// see [`for_javascript_attribute`] for encoding rules.
149pub fn write_javascript_attribute<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
150    write_js(out, input, &JS_ATTRIBUTE)
151}
152
153// ---------------------------------------------------------------------------
154// for_javascript_block — optimized for <script> blocks
155// ---------------------------------------------------------------------------
156
157/// encodes `input` for safe embedding in a javascript string literal inside
158/// an HTML `<script>` block.
159///
160/// uses backslash escapes for quotes (`\"`, `\'`) which are more readable
161/// but **not safe in HTML attribute contexts**. still encodes `&` (for XHTML
162/// compatibility) and `/` (to prevent `</script>` injection).
163///
164/// # examples
165///
166/// ```
167/// use contextual_encoder::for_javascript_block;
168///
169/// assert_eq!(for_javascript_block(r#"he said "hi""#), r#"he said \"hi\""#);
170/// assert_eq!(for_javascript_block("</script>"), r"<\/script>");
171/// ```
172pub fn for_javascript_block(input: &str) -> String {
173    encode_js(input, &JS_BLOCK)
174}
175
176/// writes the javascript-block-encoded form of `input` to `out`.
177///
178/// see [`for_javascript_block`] for encoding rules.
179pub fn write_javascript_block<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
180    write_js(out, input, &JS_BLOCK)
181}
182
183// ---------------------------------------------------------------------------
184// for_javascript_source — optimized for standalone .js files
185// ---------------------------------------------------------------------------
186
187/// encodes `input` for safe embedding in a javascript string literal in a
188/// standalone .js or JSON file.
189///
190/// the most minimal javascript encoder — does not encode `/` or `&` since
191/// there is no HTML context. **not safe for any HTML-embedded context.**
192///
193/// # examples
194///
195/// ```
196/// use contextual_encoder::for_javascript_source;
197///
198/// assert_eq!(for_javascript_source("a/b&c"), "a/b&c");
199/// assert_eq!(for_javascript_source("line\nbreak"), r"line\nbreak");
200/// ```
201pub fn for_javascript_source(input: &str) -> String {
202    encode_js(input, &JS_SOURCE)
203}
204
205/// writes the javascript-source-encoded form of `input` to `out`.
206///
207/// see [`for_javascript_source`] for encoding rules.
208pub fn write_javascript_source<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
209    write_js(out, input, &JS_SOURCE)
210}
211
212// ---------------------------------------------------------------------------
213// for_js_template — ES6 template literal encoder
214// ---------------------------------------------------------------------------
215
216/// encodes `input` for safe embedding inside an ES6 template literal
217/// (`` `...` ``).
218///
219/// template literals use backticks as delimiters and `${...}` for
220/// interpolation. this encoder escapes both so untrusted data cannot break
221/// out of the literal or inject expressions.
222///
223/// # encoding rules
224///
225/// - `` ` `` → `` \` `` (prevents breaking out of the template literal)
226/// - `$` followed by `{` → `\${` (prevents expression interpolation)
227/// - `\` → `\\`
228/// - `/` → `\/` (prevents `</script>` injection)
229/// - C0 controls → named escapes (`\b`, `\t`, `\n`, `\f`, `\r`) or hex
230///   (`\xHH`)
231/// - U+2028 → `\u2028`, U+2029 → `\u2029` (line/paragraph separators)
232///
233/// unlike the string literal encoders, this does **not** escape `"` or `'`
234/// (they are ordinary characters inside template literals).
235///
236/// # examples
237///
238/// ```
239/// use contextual_encoder::for_js_template;
240///
241/// assert_eq!(for_js_template("hello `world`"), r"hello \`world\`");
242/// assert_eq!(for_js_template("${alert(1)}"), r"\${alert(1)}");
243/// assert_eq!(for_js_template("safe"), "safe");
244/// assert_eq!(for_js_template("a $ b"), "a $ b");
245/// ```
246pub fn for_js_template(input: &str) -> String {
247    let mut out = String::with_capacity(input.len());
248    write_js_template(&mut out, input).expect("writing to string cannot fail");
249    out
250}
251
252/// writes the template-literal-encoded form of `input` to `out`.
253///
254/// see [`for_js_template`] for encoding rules.
255pub fn write_js_template<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
256    encode_loop(
257        out,
258        input,
259        needs_js_template_encoding,
260        write_js_template_encoded,
261    )
262}
263
264fn needs_js_template_encoding(c: char) -> bool {
265    matches!(
266        c,
267        '\x00'..='\x1F' | '\\' | '`' | '$' | '/' | '\u{2028}' | '\u{2029}'
268    )
269}
270
271fn write_js_template_encoded<W: fmt::Write>(
272    out: &mut W,
273    c: char,
274    next: Option<char>,
275) -> fmt::Result {
276    match c {
277        '\x08' => out.write_str("\\b"),
278        '\t' => out.write_str("\\t"),
279        '\n' => out.write_str("\\n"),
280        '\x0B' => out.write_str("\\x0b"),
281        '\x0C' => out.write_str("\\f"),
282        '\r' => out.write_str("\\r"),
283        '`' => out.write_str("\\`"),
284        '$' if next == Some('{') => out.write_str("\\$"),
285        '$' => out.write_char('$'),
286        '/' => out.write_str("\\/"),
287        '\\' => out.write_str("\\\\"),
288        '\u{2028}' => out.write_str("\\u2028"),
289        '\u{2029}' => out.write_str("\\u2029"),
290        // other C0 controls
291        c => write!(out, "\\x{:02x}", c as u32),
292    }
293}
294
295// ---------------------------------------------------------------------------
296// shared implementation (string literal encoders)
297// ---------------------------------------------------------------------------
298
299fn encode_js(input: &str, config: &JsConfig) -> String {
300    let mut out = String::with_capacity(input.len());
301    write_js(&mut out, input, config).expect("writing to string cannot fail");
302    out
303}
304
305fn write_js<W: fmt::Write>(out: &mut W, input: &str, config: &JsConfig) -> fmt::Result {
306    encode_loop(
307        out,
308        input,
309        |c| needs_js_encoding(c, config),
310        |out, c, _next| write_js_encoded(out, c, config),
311    )
312}
313
314fn needs_js_encoding(c: char, config: &JsConfig) -> bool {
315    match c {
316        '\x00'..='\x1F' | '\\' | '"' | '\'' | '\u{2028}' | '\u{2029}' => true,
317        '&' => config.encode_ampersand,
318        '/' => config.encode_slash,
319        _ => false,
320    }
321}
322
323fn write_js_encoded<W: fmt::Write>(out: &mut W, c: char, config: &JsConfig) -> fmt::Result {
324    match c {
325        '\x08' => out.write_str("\\b"),
326        '\t' => out.write_str("\\t"),
327        '\n' => out.write_str("\\n"),
328        '\x0B' => out.write_str("\\x0b"),
329        '\x0C' => out.write_str("\\f"),
330        '\r' => out.write_str("\\r"),
331        '"' if config.hex_quotes => out.write_str("\\x22"),
332        '"' => out.write_str("\\\""),
333        '\'' if config.hex_quotes => out.write_str("\\x27"),
334        '\'' => out.write_str("\\'"),
335        '&' => out.write_str("\\x26"),
336        '/' => out.write_str("\\/"),
337        '\\' => out.write_str("\\\\"),
338        '\u{2028}' => out.write_str("\\u2028"),
339        '\u{2029}' => out.write_str("\\u2029"),
340        // other C0 controls
341        c => write!(out, "\\x{:02x}", c as u32),
342    }
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348
349    // -- for_javascript (universal) --
350
351    #[test]
352    fn js_no_encoding_needed() {
353        assert_eq!(for_javascript("hello world"), "hello world");
354        assert_eq!(for_javascript(""), "");
355    }
356
357    #[test]
358    fn js_encodes_quotes_as_hex() {
359        assert_eq!(for_javascript(r#"a"b"#), r"a\x22b");
360        assert_eq!(for_javascript("a'b"), r"a\x27b");
361    }
362
363    #[test]
364    fn js_encodes_backslash() {
365        assert_eq!(for_javascript(r"a\b"), r"a\\b");
366    }
367
368    #[test]
369    fn js_encodes_ampersand() {
370        assert_eq!(for_javascript("a&b"), r"a\x26b");
371    }
372
373    #[test]
374    fn js_encodes_slash() {
375        assert_eq!(for_javascript("</script>"), r"<\/script>");
376    }
377
378    #[test]
379    fn js_encodes_control_chars() {
380        assert_eq!(for_javascript("\x00"), r"\x00");
381        assert_eq!(for_javascript("\x08"), r"\b");
382        assert_eq!(for_javascript("\t"), r"\t");
383        assert_eq!(for_javascript("\n"), r"\n");
384        assert_eq!(for_javascript("\x0B"), r"\x0b");
385        assert_eq!(for_javascript("\x0C"), r"\f");
386        assert_eq!(for_javascript("\r"), r"\r");
387        assert_eq!(for_javascript("\x1F"), r"\x1f");
388    }
389
390    #[test]
391    fn js_encodes_line_separators() {
392        assert_eq!(for_javascript("\u{2028}"), r"\u2028");
393        assert_eq!(for_javascript("\u{2029}"), r"\u2029");
394    }
395
396    #[test]
397    fn js_preserves_non_ascii() {
398        assert_eq!(for_javascript("café"), "café");
399        assert_eq!(for_javascript("日本語"), "日本語");
400    }
401
402    #[test]
403    fn js_writer_variant() {
404        let mut out = String::new();
405        write_javascript(&mut out, "a'b").unwrap();
406        assert_eq!(out, r"a\x27b");
407    }
408
409    // -- for_javascript_attribute --
410
411    #[test]
412    fn js_attr_does_not_encode_slash() {
413        assert_eq!(for_javascript_attribute("a/b"), "a/b");
414    }
415
416    #[test]
417    fn js_attr_encodes_quotes_as_hex() {
418        assert_eq!(for_javascript_attribute("a'b"), r"a\x27b");
419    }
420
421    #[test]
422    fn js_attr_encodes_ampersand() {
423        assert_eq!(for_javascript_attribute("a&b"), r"a\x26b");
424    }
425
426    // -- for_javascript_block --
427
428    #[test]
429    fn js_block_uses_backslash_quotes() {
430        assert_eq!(for_javascript_block(r#"a"b"#), r#"a\"b"#);
431        assert_eq!(for_javascript_block("a'b"), r"a\'b");
432    }
433
434    #[test]
435    fn js_block_encodes_slash() {
436        assert_eq!(for_javascript_block("a/b"), r"a\/b");
437    }
438
439    #[test]
440    fn js_block_encodes_ampersand() {
441        assert_eq!(for_javascript_block("a&b"), r"a\x26b");
442    }
443
444    // -- for_javascript_source --
445
446    #[test]
447    fn js_source_uses_backslash_quotes() {
448        assert_eq!(for_javascript_source(r#"a"b"#), r#"a\"b"#);
449        assert_eq!(for_javascript_source("a'b"), r"a\'b");
450    }
451
452    #[test]
453    fn js_source_does_not_encode_slash_or_ampersand() {
454        assert_eq!(for_javascript_source("a/b&c"), "a/b&c");
455    }
456
457    #[test]
458    fn js_source_encodes_line_separators() {
459        assert_eq!(for_javascript_source("\u{2028}"), r"\u2028");
460    }
461
462    // -- for_js_template --
463
464    #[test]
465    fn js_template_no_encoding_needed() {
466        assert_eq!(for_js_template("hello world"), "hello world");
467        assert_eq!(for_js_template(""), "");
468    }
469
470    #[test]
471    fn js_template_encodes_backtick() {
472        assert_eq!(for_js_template("hello `world`"), r"hello \`world\`");
473        assert_eq!(for_js_template("`"), r"\`");
474    }
475
476    #[test]
477    fn js_template_encodes_interpolation() {
478        assert_eq!(for_js_template("${alert(1)}"), r"\${alert(1)}");
479        assert_eq!(for_js_template("a${b}c"), r"a\${b}c");
480        assert_eq!(for_js_template("${a}${b}"), r"\${a}\${b}");
481    }
482
483    #[test]
484    fn js_template_dollar_without_brace_passes_through() {
485        assert_eq!(for_js_template("a $ b"), "a $ b");
486        assert_eq!(for_js_template("$100"), "$100");
487        assert_eq!(for_js_template("a$"), "a$");
488    }
489
490    #[test]
491    fn js_template_encodes_backslash() {
492        assert_eq!(for_js_template(r"a\b"), r"a\\b");
493    }
494
495    #[test]
496    fn js_template_encodes_slash() {
497        assert_eq!(for_js_template("</script>"), r"<\/script>");
498    }
499
500    #[test]
501    fn js_template_does_not_encode_quotes() {
502        assert_eq!(for_js_template(r#"a"b"#), r#"a"b"#);
503        assert_eq!(for_js_template("a'b"), "a'b");
504    }
505
506    #[test]
507    fn js_template_encodes_control_chars() {
508        assert_eq!(for_js_template("\x00"), r"\x00");
509        assert_eq!(for_js_template("\x08"), r"\b");
510        assert_eq!(for_js_template("\t"), r"\t");
511        assert_eq!(for_js_template("\n"), r"\n");
512        assert_eq!(for_js_template("\x0B"), r"\x0b");
513        assert_eq!(for_js_template("\x0C"), r"\f");
514        assert_eq!(for_js_template("\r"), r"\r");
515        assert_eq!(for_js_template("\x1F"), r"\x1f");
516    }
517
518    #[test]
519    fn js_template_encodes_line_separators() {
520        assert_eq!(for_js_template("\u{2028}"), r"\u2028");
521        assert_eq!(for_js_template("\u{2029}"), r"\u2029");
522    }
523
524    #[test]
525    fn js_template_preserves_non_ascii() {
526        assert_eq!(for_js_template("café"), "café");
527        assert_eq!(for_js_template("日本語"), "日本語");
528        assert_eq!(for_js_template("😀"), "😀");
529    }
530
531    #[test]
532    fn js_template_mixed_input() {
533        assert_eq!(
534            for_js_template("`Hello ${name}`, welcome\\n"),
535            r"\`Hello \${name}\`, welcome\\n"
536        );
537    }
538
539    #[test]
540    fn js_template_writer_variant() {
541        let input = "`test` ${x} café";
542        let string_result = for_js_template(input);
543        let mut writer_result = String::new();
544        write_js_template(&mut writer_result, input).unwrap();
545        assert_eq!(string_result, writer_result);
546    }
547}