Skip to main content

contextual_encoder/
javascript.rs

1//! javascript contextual output encoders.
2//!
3//! provides four encoding contexts:
4//!
5//! - [`for_javascript`] — universal encoder, safe in HTML attributes, script
6//!   blocks, and standalone .js files
7//! - [`for_javascript_attribute`] — optimized for HTML event attributes
8//!   (e.g., `onclick="..."`)
9//! - [`for_javascript_block`] — optimized for `<script>` blocks
10//! - [`for_javascript_source`] — optimized for standalone .js / JSON files
11//!
12//! # security notes
13//!
14//! - none of these encoders encode the grave accent (`` ` ``). **never embed
15//!   untrusted data directly inside ES2015+ template literals.** instead,
16//!   encode the data into a regular javascript string variable, then reference
17//!   that variable from the template literal.
18//! - these encoders are for string literal contexts only. they cannot make
19//!   arbitrary javascript expressions, variable names, or property accessors
20//!   safe.
21//! - `for_javascript_block` and `for_javascript_source` use backslash escapes
22//!   for quotes (`\"`, `\'`) which are **not safe in HTML attribute contexts**.
23//! - `for_javascript_attribute` does not escape `/` and is **not safe in
24//!   `<script>` blocks** where `</script>` could appear.
25
26use std::fmt;
27
28use crate::engine::encode_loop;
29
30/// configuration flags controlling context-specific encoding differences.
31#[derive(Clone, Copy)]
32struct JsConfig {
33    /// true: `"` → `\x22`, `'` → `\x27` (safe in HTML attributes).
34    /// false: `"` → `\"`, `'` → `\'` (more readable, not HTML-attr safe).
35    hex_quotes: bool,
36    /// true: encode `&` as `\x26` (prevents HTML entity interpretation).
37    encode_ampersand: bool,
38    /// true: encode `/` as `\/` (prevents `</script>` injection).
39    encode_slash: bool,
40}
41
42const JS_UNIVERSAL: JsConfig = JsConfig {
43    hex_quotes: true,
44    encode_ampersand: true,
45    encode_slash: true,
46};
47
48const JS_ATTRIBUTE: JsConfig = JsConfig {
49    hex_quotes: true,
50    encode_ampersand: true,
51    encode_slash: false,
52};
53
54const JS_BLOCK: JsConfig = JsConfig {
55    hex_quotes: false,
56    encode_ampersand: true,
57    encode_slash: true,
58};
59
60const JS_SOURCE: JsConfig = JsConfig {
61    hex_quotes: false,
62    encode_ampersand: false,
63    encode_slash: false,
64};
65
66// ---------------------------------------------------------------------------
67// for_javascript — universal encoder (safe everywhere)
68// ---------------------------------------------------------------------------
69
70/// encodes `input` for safe embedding in a javascript string literal.
71///
72/// this is the universal javascript encoder — its output is safe in HTML
73/// event attributes, `<script>` blocks, and standalone .js files. it is
74/// slightly more conservative than the context-specific encoders.
75///
76/// # encoding rules
77///
78/// - C0 controls → named escapes (`\b`, `\t`, `\n`, `\f`, `\r`) or hex
79///   (`\xHH`)
80/// - `"` → `\x22`, `'` → `\x27` (hex escapes for HTML attribute safety)
81/// - `&` → `\x26` (prevents HTML entity interpretation)
82/// - `/` → `\/` (prevents `</script>` injection)
83/// - `\` → `\\`
84/// - U+2028 → `\u2028`, U+2029 → `\u2029` (javascript line terminators)
85///
86/// # caveat: template literals
87///
88/// this encoder does **not** encode the grave accent (`` ` ``). never
89/// embed untrusted data directly inside template literals. instead:
90///
91/// ```js
92/// // WRONG — vulnerable to XSS:
93/// // `Hello ${unsafeInput}`
94/// //
95/// // RIGHT — encode into a variable first:
96/// // var x = '<encoded>';
97/// // `Hello ${x}`
98/// ```
99///
100/// # examples
101///
102/// ```
103/// use contextual_encoder::for_javascript;
104///
105/// assert_eq!(for_javascript(r#"it's "unsafe" </script>"#),
106///            r"it\x27s \x22unsafe\x22 <\/script>");
107/// assert_eq!(for_javascript("safe"), "safe");
108/// ```
109pub fn for_javascript(input: &str) -> String {
110    encode_js(input, &JS_UNIVERSAL)
111}
112
113/// writes the javascript-encoded form of `input` to `out`.
114///
115/// see [`for_javascript`] for encoding rules.
116pub fn write_javascript<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
117    write_js(out, input, &JS_UNIVERSAL)
118}
119
120// ---------------------------------------------------------------------------
121// for_javascript_attribute — optimized for HTML event attributes
122// ---------------------------------------------------------------------------
123
124/// encodes `input` for safe embedding in a javascript string literal inside
125/// an HTML event attribute (e.g., `onclick="..."`).
126///
127/// identical to [`for_javascript`] except `/` is **not** escaped (not
128/// needed in event attributes where `</script>` is not a concern).
129///
130/// **not safe in `<script>` blocks** — use [`for_javascript`] or
131/// [`for_javascript_block`] instead.
132///
133/// # examples
134///
135/// ```
136/// use contextual_encoder::for_javascript_attribute;
137///
138/// assert_eq!(for_javascript_attribute("a/b"), "a/b");
139/// assert_eq!(for_javascript_attribute("a'b"), r"a\x27b");
140/// ```
141pub fn for_javascript_attribute(input: &str) -> String {
142    encode_js(input, &JS_ATTRIBUTE)
143}
144
145/// writes the javascript-attribute-encoded form of `input` to `out`.
146///
147/// see [`for_javascript_attribute`] for encoding rules.
148pub fn write_javascript_attribute<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
149    write_js(out, input, &JS_ATTRIBUTE)
150}
151
152// ---------------------------------------------------------------------------
153// for_javascript_block — optimized for <script> blocks
154// ---------------------------------------------------------------------------
155
156/// encodes `input` for safe embedding in a javascript string literal inside
157/// an HTML `<script>` block.
158///
159/// uses backslash escapes for quotes (`\"`, `\'`) which are more readable
160/// but **not safe in HTML attribute contexts**. still encodes `&` (for XHTML
161/// compatibility) and `/` (to prevent `</script>` injection).
162///
163/// # examples
164///
165/// ```
166/// use contextual_encoder::for_javascript_block;
167///
168/// assert_eq!(for_javascript_block(r#"he said "hi""#), r#"he said \"hi\""#);
169/// assert_eq!(for_javascript_block("</script>"), r"<\/script>");
170/// ```
171pub fn for_javascript_block(input: &str) -> String {
172    encode_js(input, &JS_BLOCK)
173}
174
175/// writes the javascript-block-encoded form of `input` to `out`.
176///
177/// see [`for_javascript_block`] for encoding rules.
178pub fn write_javascript_block<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
179    write_js(out, input, &JS_BLOCK)
180}
181
182// ---------------------------------------------------------------------------
183// for_javascript_source — optimized for standalone .js files
184// ---------------------------------------------------------------------------
185
186/// encodes `input` for safe embedding in a javascript string literal in a
187/// standalone .js or JSON file.
188///
189/// the most minimal javascript encoder — does not encode `/` or `&` since
190/// there is no HTML context. **not safe for any HTML-embedded context.**
191///
192/// # examples
193///
194/// ```
195/// use contextual_encoder::for_javascript_source;
196///
197/// assert_eq!(for_javascript_source("a/b&c"), "a/b&c");
198/// assert_eq!(for_javascript_source("line\nbreak"), r"line\nbreak");
199/// ```
200pub fn for_javascript_source(input: &str) -> String {
201    encode_js(input, &JS_SOURCE)
202}
203
204/// writes the javascript-source-encoded form of `input` to `out`.
205///
206/// see [`for_javascript_source`] for encoding rules.
207pub fn write_javascript_source<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
208    write_js(out, input, &JS_SOURCE)
209}
210
211// ---------------------------------------------------------------------------
212// shared implementation
213// ---------------------------------------------------------------------------
214
215fn encode_js(input: &str, config: &JsConfig) -> String {
216    let mut out = String::with_capacity(input.len());
217    write_js(&mut out, input, config).expect("writing to string cannot fail");
218    out
219}
220
221fn write_js<W: fmt::Write>(out: &mut W, input: &str, config: &JsConfig) -> fmt::Result {
222    encode_loop(
223        out,
224        input,
225        |c| needs_js_encoding(c, config),
226        |out, c, _next| write_js_encoded(out, c, config),
227    )
228}
229
230fn needs_js_encoding(c: char, config: &JsConfig) -> bool {
231    match c {
232        '\x00'..='\x1F' | '\\' | '"' | '\'' | '\u{2028}' | '\u{2029}' => true,
233        '&' => config.encode_ampersand,
234        '/' => config.encode_slash,
235        _ => false,
236    }
237}
238
239fn write_js_encoded<W: fmt::Write>(out: &mut W, c: char, config: &JsConfig) -> fmt::Result {
240    match c {
241        '\x08' => out.write_str("\\b"),
242        '\t' => out.write_str("\\t"),
243        '\n' => out.write_str("\\n"),
244        '\x0B' => out.write_str("\\x0b"),
245        '\x0C' => out.write_str("\\f"),
246        '\r' => out.write_str("\\r"),
247        '"' if config.hex_quotes => out.write_str("\\x22"),
248        '"' => out.write_str("\\\""),
249        '\'' if config.hex_quotes => out.write_str("\\x27"),
250        '\'' => out.write_str("\\'"),
251        '&' => out.write_str("\\x26"),
252        '/' => out.write_str("\\/"),
253        '\\' => out.write_str("\\\\"),
254        '\u{2028}' => out.write_str("\\u2028"),
255        '\u{2029}' => out.write_str("\\u2029"),
256        // other C0 controls
257        c => write!(out, "\\x{:02x}", c as u32),
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    // -- for_javascript (universal) --
266
267    #[test]
268    fn js_no_encoding_needed() {
269        assert_eq!(for_javascript("hello world"), "hello world");
270        assert_eq!(for_javascript(""), "");
271    }
272
273    #[test]
274    fn js_encodes_quotes_as_hex() {
275        assert_eq!(for_javascript(r#"a"b"#), r"a\x22b");
276        assert_eq!(for_javascript("a'b"), r"a\x27b");
277    }
278
279    #[test]
280    fn js_encodes_backslash() {
281        assert_eq!(for_javascript(r"a\b"), r"a\\b");
282    }
283
284    #[test]
285    fn js_encodes_ampersand() {
286        assert_eq!(for_javascript("a&b"), r"a\x26b");
287    }
288
289    #[test]
290    fn js_encodes_slash() {
291        assert_eq!(for_javascript("</script>"), r"<\/script>");
292    }
293
294    #[test]
295    fn js_encodes_control_chars() {
296        assert_eq!(for_javascript("\x00"), r"\x00");
297        assert_eq!(for_javascript("\x08"), r"\b");
298        assert_eq!(for_javascript("\t"), r"\t");
299        assert_eq!(for_javascript("\n"), r"\n");
300        assert_eq!(for_javascript("\x0B"), r"\x0b");
301        assert_eq!(for_javascript("\x0C"), r"\f");
302        assert_eq!(for_javascript("\r"), r"\r");
303        assert_eq!(for_javascript("\x1F"), r"\x1f");
304    }
305
306    #[test]
307    fn js_encodes_line_separators() {
308        assert_eq!(for_javascript("\u{2028}"), r"\u2028");
309        assert_eq!(for_javascript("\u{2029}"), r"\u2029");
310    }
311
312    #[test]
313    fn js_preserves_non_ascii() {
314        assert_eq!(for_javascript("café"), "café");
315        assert_eq!(for_javascript("日本語"), "日本語");
316    }
317
318    #[test]
319    fn js_writer_variant() {
320        let mut out = String::new();
321        write_javascript(&mut out, "a'b").unwrap();
322        assert_eq!(out, r"a\x27b");
323    }
324
325    // -- for_javascript_attribute --
326
327    #[test]
328    fn js_attr_does_not_encode_slash() {
329        assert_eq!(for_javascript_attribute("a/b"), "a/b");
330    }
331
332    #[test]
333    fn js_attr_encodes_quotes_as_hex() {
334        assert_eq!(for_javascript_attribute("a'b"), r"a\x27b");
335    }
336
337    #[test]
338    fn js_attr_encodes_ampersand() {
339        assert_eq!(for_javascript_attribute("a&b"), r"a\x26b");
340    }
341
342    // -- for_javascript_block --
343
344    #[test]
345    fn js_block_uses_backslash_quotes() {
346        assert_eq!(for_javascript_block(r#"a"b"#), r#"a\"b"#);
347        assert_eq!(for_javascript_block("a'b"), r"a\'b");
348    }
349
350    #[test]
351    fn js_block_encodes_slash() {
352        assert_eq!(for_javascript_block("a/b"), r"a\/b");
353    }
354
355    #[test]
356    fn js_block_encodes_ampersand() {
357        assert_eq!(for_javascript_block("a&b"), r"a\x26b");
358    }
359
360    // -- for_javascript_source --
361
362    #[test]
363    fn js_source_uses_backslash_quotes() {
364        assert_eq!(for_javascript_source(r#"a"b"#), r#"a\"b"#);
365        assert_eq!(for_javascript_source("a'b"), r"a\'b");
366    }
367
368    #[test]
369    fn js_source_does_not_encode_slash_or_ampersand() {
370        assert_eq!(for_javascript_source("a/b&c"), "a/b&c");
371    }
372
373    #[test]
374    fn js_source_encodes_line_separators() {
375        assert_eq!(for_javascript_source("\u{2028}"), r"\u2028");
376    }
377}