Skip to main content

contextual_encoder/
json.rs

1//! JSON string encoder.
2//!
3//! encodes untrusted strings for safe embedding in JSON string values.
4//!
5//! - [`for_json`] — safe for JSON string contexts
6//!
7//! # why not `for_javascript_source`?
8//!
9//! JSON looks like JavaScript but has two critical encoding differences:
10//!
11//! - **no `\x` escapes.** JSON only supports `\uHHHH` for unicode escapes.
12//!   the `\xHH` form that JavaScript uses for control characters is invalid JSON.
13//! - **no single-quote escaping.** `\'` is not a valid JSON escape sequence.
14//!   single quotes are ordinary characters in JSON strings.
15//!
16//! using `for_javascript_source` for JSON output produces strings that may be
17//! rejected by strict JSON parsers.
18//!
19//! # encoding rules
20//!
21//! - named escapes: `\b`, `\t`, `\n`, `\f`, `\r`, `\"`, `\\`
22//! - other C0 controls (U+0000–U+001F) → `\u00HH`
23//! - `/` → `\/` (forward slash; prevents `</script>` breakout when JSON
24//!   is embedded in HTML `<script>` blocks. RFC 8259 §7 explicitly permits
25//!   `\/` as a valid escape sequence)
26//! - U+2028 → `\u2028`, U+2029 → `\u2029` (line/paragraph separators;
27//!   mandatory because JSON is often embedded in `<script>` blocks where
28//!   these would terminate the JavaScript string literal)
29//! - all other characters pass through unchanged
30
31use std::fmt;
32
33use crate::engine::encode_loop;
34
35/// encodes `input` for safe embedding in a JSON string value.
36///
37/// produces output suitable for embedding between double quotes in a JSON
38/// document. the result conforms to [RFC 8259](https://www.rfc-editor.org/rfc/rfc8259)
39/// and additionally escapes U+2028/U+2029 for safe embedding in HTML
40/// `<script>` blocks.
41///
42/// # encoding rules
43///
44/// | input | output |
45/// |-------|--------|
46/// | `\b` (U+0008) | `\b` |
47/// | `\t` (U+0009) | `\t` |
48/// | `\n` (U+000A) | `\n` |
49/// | `\f` (U+000C) | `\f` |
50/// | `\r` (U+000D) | `\r` |
51/// | `"` | `\"` |
52/// | `\` | `\\` |
53/// | `/` | `\/` |
54/// | other C0 controls (U+0000–U+001F) | `\u00HH` |
55/// | U+2028 (line separator) | `\u2028` |
56/// | U+2029 (paragraph separator) | `\u2029` |
57/// | single quotes, `&` | unchanged |
58///
59/// # difference from JavaScript encoders
60///
61/// - single quotes are **not** escaped (JSON has no `\'` escape sequence)
62/// - control characters use `\u00HH` (JSON has no `\xHH` escape sequence)
63///
64/// # examples
65///
66/// ```
67/// use contextual_encoder::for_json;
68///
69/// assert_eq!(for_json(r#"he said "hello""#), r#"he said \"hello\""#);
70/// assert_eq!(for_json("it's fine"), "it's fine");
71/// assert_eq!(for_json("line\nbreak"), r"line\nbreak");
72/// assert_eq!(for_json("\u{2028}"), r"\u2028");
73/// ```
74pub fn for_json(input: &str) -> String {
75    let mut out = String::with_capacity(input.len());
76    write_json(&mut out, input).expect("writing to string cannot fail");
77    out
78}
79
80/// writes the JSON-encoded form of `input` to `out`.
81///
82/// see [`for_json`] for encoding rules.
83pub fn write_json<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
84    encode_loop(out, input, needs_json_encoding, write_json_encoded)
85}
86
87fn needs_json_encoding(c: char) -> bool {
88    matches!(
89        c,
90        '\x00'..='\x1F' | '"' | '\\' | '/' | '\u{2028}' | '\u{2029}'
91    )
92}
93
94fn write_json_encoded<W: fmt::Write>(out: &mut W, c: char, _next: Option<char>) -> fmt::Result {
95    match c {
96        '\x08' => out.write_str("\\b"),
97        '\t' => out.write_str("\\t"),
98        '\n' => out.write_str("\\n"),
99        '\x0C' => out.write_str("\\f"),
100        '\r' => out.write_str("\\r"),
101        '"' => out.write_str("\\\""),
102        '\\' => out.write_str("\\\\"),
103        '/' => out.write_str("\\/"),
104        '\u{2028}' => out.write_str("\\u2028"),
105        '\u{2029}' => out.write_str("\\u2029"),
106        // other C0 controls → \u00HH (JSON does not support \xHH)
107        c => write!(out, "\\u{:04x}", c as u32),
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    #[test]
116    fn passthrough() {
117        assert_eq!(for_json("hello world"), "hello world");
118        assert_eq!(for_json(""), "");
119        assert_eq!(for_json("café"), "café");
120        assert_eq!(for_json("日本語"), "日本語");
121        assert_eq!(for_json("😀"), "😀");
122    }
123
124    #[test]
125    fn single_quotes_not_escaped() {
126        assert_eq!(for_json("it's"), "it's");
127        assert_eq!(for_json("'quoted'"), "'quoted'");
128    }
129
130    #[test]
131    fn double_quotes_escaped() {
132        assert_eq!(for_json(r#"a"b"#), r#"a\"b"#);
133        assert_eq!(for_json(r#""hello""#), r#"\"hello\""#);
134    }
135
136    #[test]
137    fn backslash() {
138        assert_eq!(for_json(r"a\b"), r"a\\b");
139        assert_eq!(for_json(r"\\"), r"\\\\");
140    }
141
142    #[test]
143    fn named_escapes() {
144        assert_eq!(for_json("\x08"), "\\b");
145        assert_eq!(for_json("\t"), "\\t");
146        assert_eq!(for_json("\n"), "\\n");
147        assert_eq!(for_json("\x0C"), "\\f");
148        assert_eq!(for_json("\r"), "\\r");
149    }
150
151    #[test]
152    fn control_chars_use_unicode_escapes() {
153        // JSON requires \u00HH, not \xHH
154        assert_eq!(for_json("\x00"), "\\u0000");
155        assert_eq!(for_json("\x01"), "\\u0001");
156        assert_eq!(for_json("\x07"), "\\u0007");
157        assert_eq!(for_json("\x0B"), "\\u000b");
158        assert_eq!(for_json("\x0E"), "\\u000e");
159        assert_eq!(for_json("\x1F"), "\\u001f");
160    }
161
162    #[test]
163    fn line_separators() {
164        assert_eq!(for_json("\u{2028}"), "\\u2028");
165        assert_eq!(for_json("\u{2029}"), "\\u2029");
166        assert_eq!(for_json("a\u{2028}b\u{2029}c"), "a\\u2028b\\u2029c");
167    }
168
169    #[test]
170    fn forward_slash_escaped() {
171        assert_eq!(for_json("/"), "\\/");
172        assert_eq!(for_json("a/b"), "a\\/b");
173        assert_eq!(for_json("https://example.com"), "https:\\/\\/example.com");
174    }
175
176    #[test]
177    fn ampersand_not_escaped() {
178        assert_eq!(for_json("a&b"), "a&b");
179    }
180
181    #[test]
182    fn script_tag_breakout_prevented() {
183        // the primary reason for escaping /: prevent </script> breakout
184        // when JSON is embedded in an HTML <script> block
185        assert_eq!(for_json("</script>"), "<\\/script>");
186        assert_eq!(
187            for_json("</script><script>alert(1)//"),
188            "<\\/script><script>alert(1)\\/\\/"
189        );
190    }
191
192    #[test]
193    fn mixed_input() {
194        assert_eq!(
195            for_json("he said \"hello\"\nnew line"),
196            "he said \\\"hello\\\"\\nnew line"
197        );
198    }
199
200    #[test]
201    fn writer_matches_string() {
202        let input = "test\x00\"\\\n\u{2028}café";
203        let string_result = for_json(input);
204        let mut writer_result = String::new();
205        write_json(&mut writer_result, input).unwrap();
206        assert_eq!(string_result, writer_result);
207    }
208
209    // -- key differences from for_javascript_source --
210
211    #[test]
212    fn differs_from_js_source_on_single_quotes() {
213        // JS source escapes single quotes; JSON does not
214        assert_eq!(for_json("a'b"), "a'b");
215        assert_ne!(for_json("a'b"), crate::for_javascript_source("a'b"));
216    }
217
218    #[test]
219    fn differs_from_js_source_on_control_format() {
220        // JS source uses \xHH; JSON uses \u00HH
221        assert_eq!(for_json("\x01"), "\\u0001");
222        assert_eq!(crate::for_javascript_source("\x01"), "\\x01");
223    }
224}