Skip to main content

bougie_php_json/
lib.rs

1//! PHP-compatible JSON encoder. Byte-exact output for two PHP
2//! `json_encode` flag combinations Composer relies on:
3//!
4//! - [`Mode::Hash`] — `json_encode($d, 0)`. The byte stream MD5'd into
5//!   `composer.lock`'s `content-hash` (see [`super::lockfile::content_hash`]).
6//!   Compact, forward slashes escaped to `\/`, every code point ≥ 0x80
7//!   escaped to `\uXXXX` lowercase, surrogate pairs for code points
8//!   > 0xFFFF. U+2028 / U+2029 fall under the same rule.
9//!
10//! - [`Mode::Pretty`] — `json_encode($d, JSON_PRETTY_PRINT |
11//!   JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE)`. The flag set
12//!   `Composer\Json\JsonFile::encode` defaults to when writing
13//!   composer.json / composer.lock. 4-space indent, raw `/`, raw UTF-8
14//!   for non-ASCII — except U+2028 / U+2029, which Composer keeps
15//!   escaped (it doesn't pass `JSON_UNESCAPED_LINE_TERMINATORS`).
16//!
17//! Shared rules (both modes):
18//!
19//! - `"` → `\"`, `\` → `\\`.
20//! - Named escapes for 0x08 → `\b`, 0x09 → `\t`, 0x0A → `\n`,
21//!   0x0C → `\f`, 0x0D → `\r`.
22//! - Other C0 control bytes (0x00..0x07, 0x0B, 0x0E..0x1F) →
23//!   `\u00XX` lowercase.
24//! - 0x7F (DEL) emitted raw — PHP's escape bitmap doesn't flag it.
25//! - Object keys take the same string-escape rules as string values.
26//! - Numbers: integers via plain decimal; floats via Rust's shortest-
27//!   roundtrip `f64::to_string`, which matches PHP's `zend_gcvt` at
28//!   `serialize_precision = -1` (the post-7.1 default). Both PHP and
29//!   Rust render integer-valued floats without a fractional part
30//!   (`1.0` → `"1"`).
31
32use serde_json::Value;
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum Mode {
36    /// `json_encode($d, 0)`. Composer's content-hash byte stream.
37    Hash,
38    /// `json_encode($d, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES |
39    /// JSON_UNESCAPED_UNICODE)`. Composer's file-write encoding.
40    Pretty,
41}
42
43/// Encode a `serde_json::Value` to bytes matching PHP's `json_encode`
44/// for the given flag combination.
45pub fn encode(value: &Value, mode: Mode) -> Vec<u8> {
46    let mut out = Vec::with_capacity(256);
47    write_value(&mut out, value, mode, 0);
48    out
49}
50
51fn write_value(out: &mut Vec<u8>, v: &Value, mode: Mode, depth: usize) {
52    match v {
53        Value::Null => out.extend_from_slice(b"null"),
54        Value::Bool(true) => out.extend_from_slice(b"true"),
55        Value::Bool(false) => out.extend_from_slice(b"false"),
56        Value::Number(n) => write_number(out, n),
57        Value::String(s) => write_string(out, s, mode),
58        Value::Array(a) => write_array(out, a, mode, depth),
59        Value::Object(o) => write_object(out, o, mode, depth),
60    }
61}
62
63fn write_string(out: &mut Vec<u8>, s: &str, mode: Mode) {
64    out.push(b'"');
65    for c in s.chars() {
66        write_char_escaped(out, c, mode);
67    }
68    out.push(b'"');
69}
70
71fn write_char_escaped(out: &mut Vec<u8>, c: char, mode: Mode) {
72    let cp = c as u32;
73    match c {
74        '"' => out.extend_from_slice(b"\\\""),
75        '\\' => out.extend_from_slice(b"\\\\"),
76        '/' => match mode {
77            Mode::Hash => out.extend_from_slice(b"\\/"),
78            Mode::Pretty => out.push(b'/'),
79        },
80        '\u{08}' => out.extend_from_slice(b"\\b"),
81        '\u{09}' => out.extend_from_slice(b"\\t"),
82        '\u{0a}' => out.extend_from_slice(b"\\n"),
83        '\u{0c}' => out.extend_from_slice(b"\\f"),
84        '\u{0d}' => out.extend_from_slice(b"\\r"),
85        _ if cp < 0x20 => write_unicode_escape(out, cp),
86        // The guard `cp < 0x80` makes this cast lossless.
87        _ if cp < 0x80 => out.push(u8::try_from(cp).expect("ascii by construction")),
88        _ => match mode {
89            Mode::Hash => write_unicode_escape_full(out, cp),
90            Mode::Pretty => match cp {
91                // PHP escapes U+2028 / U+2029 even with
92                // JSON_UNESCAPED_UNICODE, unless
93                // JSON_UNESCAPED_LINE_TERMINATORS is also set —
94                // Composer doesn't pass it.
95                0x2028 | 0x2029 => write_unicode_escape(out, cp),
96                _ => {
97                    let mut buf = [0u8; 4];
98                    out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
99                }
100            },
101        },
102    }
103}
104
105/// Emit a single BMP code point as `\uXXXX` with lowercase hex.
106fn write_unicode_escape(out: &mut Vec<u8>, code: u32) {
107    const HEX: &[u8; 16] = b"0123456789abcdef";
108    out.extend_from_slice(b"\\u");
109    out.push(HEX[((code >> 12) & 0xf) as usize]);
110    out.push(HEX[((code >> 8) & 0xf) as usize]);
111    out.push(HEX[((code >> 4) & 0xf) as usize]);
112    out.push(HEX[(code & 0xf) as usize]);
113}
114
115/// Emit a code point as one `\uXXXX` (BMP) or a UTF-16 surrogate pair
116/// (`\uHHHH\uLLLL`) for code points > 0xFFFF. Matches PHP's encoder:
117/// `us -= 0x10000; high = (us >> 10) | 0xd800; low = (us & 0x3ff) | 0xdc00`.
118fn write_unicode_escape_full(out: &mut Vec<u8>, code: u32) {
119    if code <= 0xFFFF {
120        write_unicode_escape(out, code);
121    } else {
122        let adjusted = code - 0x10000;
123        let high = 0xd800 + (adjusted >> 10);
124        let low = 0xdc00 + (adjusted & 0x3ff);
125        write_unicode_escape(out, high);
126        write_unicode_escape(out, low);
127    }
128}
129
130fn write_number(out: &mut Vec<u8>, n: &serde_json::Number) {
131    if let Some(i) = n.as_i64() {
132        out.extend_from_slice(i.to_string().as_bytes());
133    } else if let Some(u) = n.as_u64() {
134        out.extend_from_slice(u.to_string().as_bytes());
135    } else if let Some(f) = n.as_f64() {
136        // PHP's zend_gcvt at serialize_precision=-1 yields the shortest
137        // round-trip form. Rust's f64 Display does the same.
138        out.extend_from_slice(f.to_string().as_bytes());
139    } else {
140        // serde_json::Number always carries one of the three above when
141        // constructed from valid JSON; falling through is unreachable
142        // in practice. Be defensive rather than panic.
143        out.extend_from_slice(b"0");
144    }
145}
146
147fn write_array(out: &mut Vec<u8>, a: &[Value], mode: Mode, depth: usize) {
148    if a.is_empty() {
149        out.extend_from_slice(b"[]");
150        return;
151    }
152    match mode {
153        Mode::Hash => {
154            out.push(b'[');
155            for (i, v) in a.iter().enumerate() {
156                if i > 0 {
157                    out.push(b',');
158                }
159                write_value(out, v, mode, depth + 1);
160            }
161            out.push(b']');
162        }
163        Mode::Pretty => {
164            out.push(b'[');
165            for (i, v) in a.iter().enumerate() {
166                if i > 0 {
167                    out.push(b',');
168                }
169                out.push(b'\n');
170                indent(out, depth + 1);
171                write_value(out, v, mode, depth + 1);
172            }
173            out.push(b'\n');
174            indent(out, depth);
175            out.push(b']');
176        }
177    }
178}
179
180fn write_object(out: &mut Vec<u8>, o: &serde_json::Map<String, Value>, mode: Mode, depth: usize) {
181    if o.is_empty() {
182        // Working from `serde_json::Value`, arrays and objects are
183        // distinct types, so an empty Map is unambiguously `{}`.
184        out.extend_from_slice(b"{}");
185        return;
186    }
187    match mode {
188        Mode::Hash => {
189            out.push(b'{');
190            for (i, (k, v)) in o.iter().enumerate() {
191                if i > 0 {
192                    out.push(b',');
193                }
194                write_string(out, k, mode);
195                out.push(b':');
196                write_value(out, v, mode, depth + 1);
197            }
198            out.push(b'}');
199        }
200        Mode::Pretty => {
201            out.push(b'{');
202            for (i, (k, v)) in o.iter().enumerate() {
203                if i > 0 {
204                    out.push(b',');
205                }
206                out.push(b'\n');
207                indent(out, depth + 1);
208                write_string(out, k, mode);
209                out.extend_from_slice(b": ");
210                write_value(out, v, mode, depth + 1);
211            }
212            out.push(b'\n');
213            indent(out, depth);
214            out.push(b'}');
215        }
216    }
217}
218
219fn indent(out: &mut Vec<u8>, depth: usize) {
220    for _ in 0..depth {
221        out.extend_from_slice(b"    ");
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228    use serde_json::json;
229
230    fn enc(v: &Value, mode: Mode) -> String {
231        String::from_utf8(encode(v, mode)).expect("UTF-8 output")
232    }
233
234    // ---- string escape rules ------------------------------------------------
235
236    #[test]
237    fn hash_escapes_forward_slash() {
238        // PHP: json_encode("a/b", 0) === "\"a\\/b\""
239        assert_eq!(enc(&json!("a/b"), Mode::Hash), "\"a\\/b\"");
240    }
241
242    #[test]
243    fn pretty_leaves_forward_slash_raw() {
244        // JSON_UNESCAPED_SLASHES is set for the file-write encoding.
245        assert_eq!(enc(&json!("a/b"), Mode::Pretty), "\"a/b\"");
246    }
247
248    #[test]
249    fn both_modes_escape_double_quote_and_backslash() {
250        assert_eq!(enc(&json!("\""), Mode::Hash), "\"\\\"\"");
251        assert_eq!(enc(&json!("\""), Mode::Pretty), "\"\\\"\"");
252        assert_eq!(enc(&json!("\\"), Mode::Hash), "\"\\\\\"");
253        assert_eq!(enc(&json!("\\"), Mode::Pretty), "\"\\\\\"");
254    }
255
256    #[test]
257    fn named_c0_escapes() {
258        // PHP names 0x08 0x09 0x0a 0x0c 0x0d; everything else < 0x20
259        // takes the generic \u00XX form.
260        assert_eq!(enc(&json!("\u{08}"), Mode::Hash), "\"\\b\"");
261        assert_eq!(enc(&json!("\u{09}"), Mode::Hash), "\"\\t\"");
262        assert_eq!(enc(&json!("\u{0a}"), Mode::Hash), "\"\\n\"");
263        assert_eq!(enc(&json!("\u{0c}"), Mode::Hash), "\"\\f\"");
264        assert_eq!(enc(&json!("\u{0d}"), Mode::Hash), "\"\\r\"");
265        assert_eq!(enc(&json!("\u{01}"), Mode::Hash), "\"\\u0001\"");
266        assert_eq!(enc(&json!("\u{0b}"), Mode::Hash), "\"\\u000b\"");
267        assert_eq!(enc(&json!("\u{1f}"), Mode::Hash), "\"\\u001f\"");
268    }
269
270    #[test]
271    fn del_0x7f_is_raw() {
272        // PHP's escape bitmap leaves 0x60..0x7F clear, so DEL is raw.
273        let out = encode(&json!("\u{7f}"), Mode::Hash);
274        assert_eq!(out, vec![b'"', 0x7f, b'"']);
275    }
276
277    #[test]
278    fn hash_escapes_non_ascii_bmp_lowercase_hex() {
279        // U+00E9 (é) → é with lowercase digits.
280        assert_eq!(enc(&json!("é"), Mode::Hash), "\"\\u00e9\"");
281        // U+201C (left double quote)
282        assert_eq!(enc(&json!("\u{201c}"), Mode::Hash), "\"\\u201c\"");
283    }
284
285    #[test]
286    fn hash_emits_surrogate_pair_for_supplementary_plane() {
287        // U+1F4A9 ("pile of poo") — common load-bearing supplementary
288        // code point. Adjusted = 0x0F4A9; high = 0xd83d, low = 0xdca9.
289        assert_eq!(enc(&json!("\u{1f4a9}"), Mode::Hash), "\"\\ud83d\\udca9\"");
290    }
291
292    #[test]
293    fn pretty_emits_non_ascii_raw_utf8() {
294        // JSON_UNESCAPED_UNICODE keeps non-ASCII as raw UTF-8 bytes…
295        let out = encode(&json!("é"), Mode::Pretty);
296        assert_eq!(out, vec![b'"', 0xc3, 0xa9, b'"']);
297    }
298
299    #[test]
300    fn pretty_still_escapes_line_terminators() {
301        // …except U+2028 / U+2029, which Composer keeps escaped because
302        // JsonFile::encode doesn't pass JSON_UNESCAPED_LINE_TERMINATORS.
303        assert_eq!(enc(&json!("\u{2028}"), Mode::Pretty), "\"\\u2028\"");
304        assert_eq!(enc(&json!("\u{2029}"), Mode::Pretty), "\"\\u2029\"");
305    }
306
307    // ---- structural shapes --------------------------------------------------
308
309    #[test]
310    fn empty_collections() {
311        assert_eq!(enc(&json!([]), Mode::Hash), "[]");
312        assert_eq!(enc(&json!({}), Mode::Hash), "{}");
313        assert_eq!(enc(&json!([]), Mode::Pretty), "[]");
314        assert_eq!(enc(&json!({}), Mode::Pretty), "{}");
315    }
316
317    #[test]
318    fn hash_compact_no_whitespace() {
319        let v = json!({"name": "acme/widget", "require": {"php": "^8.3"}});
320        assert_eq!(
321            enc(&v, Mode::Hash),
322            "{\"name\":\"acme\\/widget\",\"require\":{\"php\":\"^8.3\"}}"
323        );
324    }
325
326    #[test]
327    fn hash_preserves_nested_key_order() {
328        // serde_json with preserve_order keeps insertion order; the
329        // encoder must not re-sort nested keys (only the top level
330        // gets ksort'd, and that happens upstream in `content_hash`).
331        let v = json!({"require": {"php": "^8.3", "ext-redis": "*"}});
332        let out = enc(&v, Mode::Hash);
333        assert!(out.contains("\"php\":\"^8.3\",\"ext-redis\":\"*\""));
334    }
335
336    #[test]
337    fn pretty_uses_four_space_indent() {
338        let v = json!({"a": 1, "b": [2, 3]});
339        // Composer's JsonFile defaults to 4-space INDENT_DEFAULT.
340        let expected = "{\n    \"a\": 1,\n    \"b\": [\n        2,\n        3\n    ]\n}";
341        assert_eq!(enc(&v, Mode::Pretty), expected);
342    }
343
344    // ---- numbers ------------------------------------------------------------
345
346    #[test]
347    fn integers_plain_decimal() {
348        assert_eq!(enc(&json!(0), Mode::Hash), "0");
349        assert_eq!(enc(&json!(-7), Mode::Hash), "-7");
350        assert_eq!(enc(&json!(1_000_000_000_i64), Mode::Hash), "1000000000");
351    }
352
353    #[test]
354    fn floats_shortest_roundtrip_with_lowercase_e() {
355        // 0.1 round-trips to "0.1" in both PHP and Rust.
356        assert_eq!(enc(&json!(0.1_f64), Mode::Hash), "0.1");
357        // Integer-valued floats lose the fractional part — both PHP
358        // (without JSON_PRESERVE_ZERO_FRACTION) and Rust default to
359        // this. serde_json may parse `1.0` back as Number::F64(1.0)
360        // which displays as "1".
361        let one: Value = serde_json::from_str("1.0").unwrap();
362        assert_eq!(enc(&one, Mode::Hash), "1");
363    }
364
365    // ---- null / bool --------------------------------------------------------
366
367    #[test]
368    fn null_true_false() {
369        assert_eq!(enc(&Value::Null, Mode::Hash), "null");
370        assert_eq!(enc(&json!(true), Mode::Hash), "true");
371        assert_eq!(enc(&json!(false), Mode::Hash), "false");
372    }
373}