Skip to main content

datavalue_rs/
emit.rs

1//! Native JSON emitter for [`DataValue`] and [`OwnedDataValue`].
2//!
3//! Bypasses the `serde_json::to_string` path. The serde route pays trait
4//! dispatch per node and a per-byte string-escape loop; emitting directly
5//! into a buffer with `ryu` / `itoa` and a SWAR-driven escape scan lands
6//! closer to the bespoke emitters in `json-rust` / `simd_json`.
7//!
8//! The same writers feed three sinks — `Vec<u8>` for [`DataValue::write_json_into`],
9//! `fmt::Formatter` for the [`fmt::Display`] impls, and an indenting wrapper
10//! for [`DataValue::pretty`] — through the [`JsonSink`] trait below.
11//!
12//! The `Serialize` impl in [`crate::ser`] is still the right entry point
13//! when feeding non-JSON serde sinks (msgpack, flexbuffers, etc.).
14
15use core::fmt;
16
17use crate::number::NumberValue;
18use crate::owned::OwnedDataValue;
19use crate::value::DataValue;
20
21const SWAR_ONES: u64 = 0x0101_0101_0101_0101;
22const SWAR_HIGHS: u64 = 0x8080_8080_8080_8080;
23
24/// SWAR scan for the next byte that needs escaping inside a JSON string:
25/// `"`, `\\`, or any control byte (< 0x20). Mirrors the parser's scan.
26#[inline(always)]
27fn escape_mask(w: u64) -> u64 {
28    let q = w ^ (b'"' as u64 * SWAR_ONES);
29    let bs = w ^ (b'\\' as u64 * SWAR_ONES);
30    let lo = w & 0xE0E0_E0E0_E0E0_E0E0;
31    let m_q = q.wrapping_sub(SWAR_ONES) & !q;
32    let m_bs = bs.wrapping_sub(SWAR_ONES) & !bs;
33    let m_lo = lo.wrapping_sub(SWAR_ONES) & !lo;
34    (m_q | m_bs | m_lo) & SWAR_HIGHS
35}
36
37/// Sink abstraction over `Vec<u8>` and `fmt::Formatter`. Bytes pushed are
38/// always valid UTF-8 (numbers are ASCII; strings are passed through from
39/// `&str` sources; escapes are ASCII), so the str adapter is sound.
40pub(crate) trait JsonSink {
41    type Error;
42    fn write_bytes(&mut self, b: &[u8]) -> Result<(), Self::Error>;
43    fn write_byte(&mut self, b: u8) -> Result<(), Self::Error>;
44}
45
46impl JsonSink for Vec<u8> {
47    type Error = core::convert::Infallible;
48    #[inline]
49    fn write_bytes(&mut self, b: &[u8]) -> Result<(), Self::Error> {
50        self.extend_from_slice(b);
51        Ok(())
52    }
53    #[inline]
54    fn write_byte(&mut self, b: u8) -> Result<(), Self::Error> {
55        self.push(b);
56        Ok(())
57    }
58}
59
60struct FormatterSink<'a, 'b>(&'a mut fmt::Formatter<'b>);
61
62impl<'a, 'b> JsonSink for FormatterSink<'a, 'b> {
63    type Error = fmt::Error;
64    #[inline]
65    fn write_bytes(&mut self, b: &[u8]) -> Result<(), Self::Error> {
66        // SAFETY: every caller writes either ASCII bytes (escapes, numbers,
67        // structural punctuation) or pre-validated `&str` payloads.
68        let s = unsafe { core::str::from_utf8_unchecked(b) };
69        self.0.write_str(s)
70    }
71    #[inline]
72    fn write_byte(&mut self, b: u8) -> Result<(), Self::Error> {
73        debug_assert!(b.is_ascii());
74        let buf = [b];
75        let s = unsafe { core::str::from_utf8_unchecked(&buf) };
76        self.0.write_str(s)
77    }
78}
79
80#[inline]
81fn write_escaped_str<S: JsonSink>(out: &mut S, s: &str) -> Result<(), S::Error> {
82    out.write_byte(b'"')?;
83    let bytes = s.as_bytes();
84    let mut i = 0;
85    let mut run_start = 0;
86
87    while i + 8 <= bytes.len() {
88        let w = u64::from_le_bytes(bytes[i..i + 8].try_into().unwrap());
89        let mask = escape_mask(w);
90        if mask == 0 {
91            i += 8;
92            continue;
93        }
94        let off = (mask.trailing_zeros() / 8) as usize;
95        let hit = i + off;
96        if hit > run_start {
97            out.write_bytes(&bytes[run_start..hit])?;
98        }
99        write_escape_byte(out, bytes[hit])?;
100        i = hit + 1;
101        run_start = i;
102    }
103    // Tail: per-byte for the final < 8 bytes.
104    while i < bytes.len() {
105        let b = bytes[i];
106        if matches!(b, b'"' | b'\\') || b < 0x20 {
107            if i > run_start {
108                out.write_bytes(&bytes[run_start..i])?;
109            }
110            write_escape_byte(out, b)?;
111            run_start = i + 1;
112        }
113        i += 1;
114    }
115    if run_start < bytes.len() {
116        out.write_bytes(&bytes[run_start..])?;
117    }
118    out.write_byte(b'"')
119}
120
121#[inline]
122fn write_escape_byte<S: JsonSink>(out: &mut S, b: u8) -> Result<(), S::Error> {
123    match b {
124        b'"' => out.write_bytes(b"\\\""),
125        b'\\' => out.write_bytes(b"\\\\"),
126        b'\n' => out.write_bytes(b"\\n"),
127        b'\r' => out.write_bytes(b"\\r"),
128        b'\t' => out.write_bytes(b"\\t"),
129        0x08 => out.write_bytes(b"\\b"),
130        0x0C => out.write_bytes(b"\\f"),
131        c => {
132            // Other control bytes (< 0x20 not named above) use \u00XX. The
133            // high byte is always 0 here.
134            const HEX: &[u8; 16] = b"0123456789abcdef";
135            out.write_bytes(b"\\u00")?;
136            out.write_byte(HEX[((c >> 4) & 0x0F) as usize])?;
137            out.write_byte(HEX[(c & 0x0F) as usize])
138        }
139    }
140}
141
142#[inline]
143fn write_number<S: JsonSink>(out: &mut S, n: NumberValue) -> Result<(), S::Error> {
144    match n {
145        NumberValue::Integer(i) => {
146            let mut buf = itoa::Buffer::new();
147            out.write_bytes(buf.format(i).as_bytes())
148        }
149        NumberValue::Float(f) => {
150            if !f.is_finite() {
151                // serde_json emits non-finite floats as `null` to keep
152                // output valid JSON. Match that.
153                return out.write_bytes(b"null");
154            }
155            let mut buf = ryu::Buffer::new();
156            out.write_bytes(buf.format_finite(f).as_bytes())
157        }
158    }
159}
160
161// ---- Compact emit (no whitespace) ------------------------------------------------
162
163fn write_data_value<S: JsonSink>(out: &mut S, v: &DataValue<'_>) -> Result<(), S::Error> {
164    match *v {
165        DataValue::Null => out.write_bytes(b"null"),
166        DataValue::Bool(true) => out.write_bytes(b"true"),
167        DataValue::Bool(false) => out.write_bytes(b"false"),
168        DataValue::Number(n) => write_number(out, n),
169        DataValue::String(s) => write_escaped_str(out, s),
170        DataValue::Array(items) => {
171            out.write_byte(b'[')?;
172            let mut first = true;
173            for item in items {
174                if !first {
175                    out.write_byte(b',')?;
176                }
177                first = false;
178                write_data_value(out, item)?;
179            }
180            out.write_byte(b']')
181        }
182        DataValue::Object(pairs) => {
183            out.write_byte(b'{')?;
184            let mut first = true;
185            for (k, v) in pairs {
186                if !first {
187                    out.write_byte(b',')?;
188                }
189                first = false;
190                write_escaped_str(out, k)?;
191                out.write_byte(b':')?;
192                write_data_value(out, v)?;
193            }
194            out.write_byte(b'}')
195        }
196        #[cfg(feature = "datetime")]
197        DataValue::DateTime(d) => write_escaped_str(out, &d.to_iso_string()),
198        #[cfg(feature = "datetime")]
199        DataValue::Duration(d) => write_escaped_str(out, &d.to_string()),
200    }
201}
202
203fn write_owned_value<S: JsonSink>(out: &mut S, v: &OwnedDataValue) -> Result<(), S::Error> {
204    match v {
205        OwnedDataValue::Null => out.write_bytes(b"null"),
206        OwnedDataValue::Bool(true) => out.write_bytes(b"true"),
207        OwnedDataValue::Bool(false) => out.write_bytes(b"false"),
208        OwnedDataValue::Number(n) => write_number(out, *n),
209        OwnedDataValue::String(s) => write_escaped_str(out, s),
210        OwnedDataValue::Array(items) => {
211            out.write_byte(b'[')?;
212            let mut first = true;
213            for item in items {
214                if !first {
215                    out.write_byte(b',')?;
216                }
217                first = false;
218                write_owned_value(out, item)?;
219            }
220            out.write_byte(b']')
221        }
222        OwnedDataValue::Object(pairs) => {
223            out.write_byte(b'{')?;
224            let mut first = true;
225            for (k, v) in pairs {
226                if !first {
227                    out.write_byte(b',')?;
228                }
229                first = false;
230                write_escaped_str(out, k)?;
231                out.write_byte(b':')?;
232                write_owned_value(out, v)?;
233            }
234            out.write_byte(b'}')
235        }
236        #[cfg(feature = "datetime")]
237        OwnedDataValue::DateTime(d) => write_escaped_str(out, &d.to_iso_string()),
238        #[cfg(feature = "datetime")]
239        OwnedDataValue::Duration(d) => write_escaped_str(out, &d.to_string()),
240    }
241}
242
243// ---- Pretty emit (two-space indent, matches serde_json::to_string_pretty) -------
244
245#[inline]
246fn write_indent<S: JsonSink>(out: &mut S, depth: usize) -> Result<(), S::Error> {
247    // Two spaces per level. Keep a reasonably long literal so most depths
248    // need a single write.
249    const SPACES: &[u8; 64] = b"                                                                ";
250    let mut remaining = depth * 2;
251    while remaining > 0 {
252        let chunk = remaining.min(SPACES.len());
253        out.write_bytes(&SPACES[..chunk])?;
254        remaining -= chunk;
255    }
256    Ok(())
257}
258
259fn write_data_value_pretty<S: JsonSink>(
260    out: &mut S,
261    v: &DataValue<'_>,
262    depth: usize,
263) -> Result<(), S::Error> {
264    match *v {
265        DataValue::Null => out.write_bytes(b"null"),
266        DataValue::Bool(true) => out.write_bytes(b"true"),
267        DataValue::Bool(false) => out.write_bytes(b"false"),
268        DataValue::Number(n) => write_number(out, n),
269        DataValue::String(s) => write_escaped_str(out, s),
270        DataValue::Array(items) => {
271            if items.is_empty() {
272                return out.write_bytes(b"[]");
273            }
274            out.write_byte(b'[')?;
275            for (i, item) in items.iter().enumerate() {
276                if i > 0 {
277                    out.write_byte(b',')?;
278                }
279                out.write_byte(b'\n')?;
280                write_indent(out, depth + 1)?;
281                write_data_value_pretty(out, item, depth + 1)?;
282            }
283            out.write_byte(b'\n')?;
284            write_indent(out, depth)?;
285            out.write_byte(b']')
286        }
287        DataValue::Object(pairs) => {
288            if pairs.is_empty() {
289                return out.write_bytes(b"{}");
290            }
291            out.write_byte(b'{')?;
292            for (i, (k, v)) in pairs.iter().enumerate() {
293                if i > 0 {
294                    out.write_byte(b',')?;
295                }
296                out.write_byte(b'\n')?;
297                write_indent(out, depth + 1)?;
298                write_escaped_str(out, k)?;
299                out.write_bytes(b": ")?;
300                write_data_value_pretty(out, v, depth + 1)?;
301            }
302            out.write_byte(b'\n')?;
303            write_indent(out, depth)?;
304            out.write_byte(b'}')
305        }
306        #[cfg(feature = "datetime")]
307        DataValue::DateTime(d) => write_escaped_str(out, &d.to_iso_string()),
308        #[cfg(feature = "datetime")]
309        DataValue::Duration(d) => write_escaped_str(out, &d.to_string()),
310    }
311}
312
313fn write_owned_value_pretty<S: JsonSink>(
314    out: &mut S,
315    v: &OwnedDataValue,
316    depth: usize,
317) -> Result<(), S::Error> {
318    match v {
319        OwnedDataValue::Null => out.write_bytes(b"null"),
320        OwnedDataValue::Bool(true) => out.write_bytes(b"true"),
321        OwnedDataValue::Bool(false) => out.write_bytes(b"false"),
322        OwnedDataValue::Number(n) => write_number(out, *n),
323        OwnedDataValue::String(s) => write_escaped_str(out, s),
324        OwnedDataValue::Array(items) => {
325            if items.is_empty() {
326                return out.write_bytes(b"[]");
327            }
328            out.write_byte(b'[')?;
329            for (i, item) in items.iter().enumerate() {
330                if i > 0 {
331                    out.write_byte(b',')?;
332                }
333                out.write_byte(b'\n')?;
334                write_indent(out, depth + 1)?;
335                write_owned_value_pretty(out, item, depth + 1)?;
336            }
337            out.write_byte(b'\n')?;
338            write_indent(out, depth)?;
339            out.write_byte(b']')
340        }
341        OwnedDataValue::Object(pairs) => {
342            if pairs.is_empty() {
343                return out.write_bytes(b"{}");
344            }
345            out.write_byte(b'{')?;
346            for (i, (k, v)) in pairs.iter().enumerate() {
347                if i > 0 {
348                    out.write_byte(b',')?;
349                }
350                out.write_byte(b'\n')?;
351                write_indent(out, depth + 1)?;
352                write_escaped_str(out, k)?;
353                out.write_bytes(b": ")?;
354                write_owned_value_pretty(out, v, depth + 1)?;
355            }
356            out.write_byte(b'\n')?;
357            write_indent(out, depth)?;
358            out.write_byte(b'}')
359        }
360        #[cfg(feature = "datetime")]
361        OwnedDataValue::DateTime(d) => write_escaped_str(out, &d.to_iso_string()),
362        #[cfg(feature = "datetime")]
363        OwnedDataValue::Duration(d) => write_escaped_str(out, &d.to_string()),
364    }
365}
366
367// ---- Public API on DataValue ---------------------------------------------------
368
369impl DataValue<'_> {
370    /// Append the compact JSON encoding of this value to `out`. Useful when
371    /// you want to amortize allocation across many values into a shared buffer.
372    /// For one-shot string conversion, use the [`fmt::Display`] impl
373    /// (`v.to_string()` / `format!("{v}")` / `println!("{v}")`).
374    pub fn write_json_into(&self, out: &mut Vec<u8>) {
375        let _ = write_data_value(out, self);
376    }
377
378    /// Pretty-print wrapper. `format!("{}", v.pretty())` produces the same
379    /// two-space-indented layout as `serde_json::to_string_pretty`.
380    ///
381    /// ```
382    /// use bumpalo::Bump;
383    /// use datavalue_rs::DataValue;
384    ///
385    /// let arena = Bump::new();
386    /// let v = DataValue::from_str(r#"{"a":1}"#, &arena).unwrap();
387    /// assert_eq!(v.pretty().to_string(), "{\n  \"a\": 1\n}");
388    /// ```
389    pub fn pretty(&self) -> Pretty<'_, DataValue<'_>> {
390        Pretty(self)
391    }
392
393    /// Append the pretty JSON encoding of this value to `out`.
394    pub fn write_json_pretty_into(&self, out: &mut Vec<u8>) {
395        let _ = write_data_value_pretty(out, self, 0);
396    }
397}
398
399impl OwnedDataValue {
400    /// Append the compact JSON encoding of this value to `out`. See
401    /// [`DataValue::write_json_into`]; this is the owned-side mirror.
402    pub fn write_json_into(&self, out: &mut Vec<u8>) {
403        let _ = write_owned_value(out, self);
404    }
405
406    /// Pretty-print wrapper; see [`DataValue::pretty`].
407    ///
408    /// ```
409    /// use datavalue_rs::OwnedDataValue;
410    ///
411    /// let v: OwnedDataValue = r#"{"a":1}"#.parse().unwrap();
412    /// assert_eq!(v.pretty().to_string(), "{\n  \"a\": 1\n}");
413    /// ```
414    pub fn pretty(&self) -> Pretty<'_, OwnedDataValue> {
415        Pretty(self)
416    }
417
418    /// Append the pretty JSON encoding of this value to `out`.
419    pub fn write_json_pretty_into(&self, out: &mut Vec<u8>) {
420        let _ = write_owned_value_pretty(out, self, 0);
421    }
422}
423
424// ---- Display + Pretty wrapper ---------------------------------------------------
425
426/// Wrapper produced by [`DataValue::pretty`] / [`OwnedDataValue::pretty`] that
427/// renders the value as indented JSON via `Display`.
428pub struct Pretty<'b, T: ?Sized>(&'b T);
429
430impl fmt::Display for DataValue<'_> {
431    /// Compact JSON. Same shape as `serde_json::to_string`.
432    ///
433    /// ```
434    /// use bumpalo::Bump;
435    /// use datavalue_rs::DataValue;
436    ///
437    /// let arena = Bump::new();
438    /// let v = DataValue::from_str(r#"{"a":[1,2.5,"hi"]}"#, &arena).unwrap();
439    /// assert_eq!(v.to_string(), r#"{"a":[1,2.5,"hi"]}"#);
440    /// ```
441    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
442        write_data_value(&mut FormatterSink(f), self)
443    }
444}
445
446impl fmt::Display for OwnedDataValue {
447    /// Compact JSON. Same shape as `serde_json::to_string`.
448    ///
449    /// ```
450    /// use datavalue_rs::OwnedDataValue;
451    ///
452    /// let v: OwnedDataValue = r#"{"a":[1,2.5,"hi"]}"#.parse().unwrap();
453    /// assert_eq!(v.to_string(), r#"{"a":[1,2.5,"hi"]}"#);
454    /// ```
455    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
456        write_owned_value(&mut FormatterSink(f), self)
457    }
458}
459
460impl fmt::Display for Pretty<'_, DataValue<'_>> {
461    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
462        write_data_value_pretty(&mut FormatterSink(f), self.0, 0)
463    }
464}
465
466impl fmt::Display for Pretty<'_, OwnedDataValue> {
467    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
468        write_owned_value_pretty(&mut FormatterSink(f), self.0, 0)
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475    use bumpalo::Bump;
476
477    fn round_trip(s: &str) -> String {
478        let arena = Bump::new();
479        let v = DataValue::from_str(s, &arena).unwrap();
480        v.to_string()
481    }
482
483    #[test]
484    fn primitives() {
485        assert_eq!(round_trip("null"), "null");
486        assert_eq!(round_trip("true"), "true");
487        assert_eq!(round_trip("false"), "false");
488        assert_eq!(round_trip("42"), "42");
489        assert_eq!(round_trip("-7"), "-7");
490        assert_eq!(round_trip("3.5"), "3.5");
491    }
492
493    #[test]
494    fn strings_with_escapes() {
495        assert_eq!(round_trip(r#""hello""#), r#""hello""#);
496        assert_eq!(round_trip(r#""a\nb""#), r#""a\nb""#);
497        assert_eq!(round_trip(r#""a\\b""#), r#""a\\b""#);
498        assert_eq!(round_trip(r#""a\"b""#), r#""a\"b""#);
499        // Unicode passes through verbatim (we don't re-escape non-ASCII).
500        assert_eq!(round_trip(r#""café""#), r#""café""#);
501    }
502
503    #[test]
504    fn control_bytes_render_as_unicode_escapes() {
505        let arena = Bump::new();
506        let v = DataValue::from_str("\"\\u0001\"", &arena).unwrap();
507        assert_eq!(v.to_string(), "\"\\u0001\"");
508    }
509
510    #[test]
511    fn nested_round_trip_matches_serde_json() {
512        let input = r#"{"a":[1,2,{"b":"hi\n","c":null,"d":true}],"e":-3.5,"f":[],"g":{}}"#;
513        let arena = Bump::new();
514        let v = DataValue::from_str(input, &arena).unwrap();
515        let ours = v.to_string();
516        let serde: serde_json::Value = serde_json::from_str(input).unwrap();
517        let theirs = serde_json::to_string(&serde).unwrap();
518        assert_eq!(ours, theirs);
519    }
520
521    #[test]
522    fn long_string_swar_path() {
523        let arena = Bump::new();
524        let s = format!("\"{}\"", "x".repeat(200));
525        let v = DataValue::from_str(&s, &arena).unwrap();
526        assert_eq!(v.to_string(), s);
527    }
528
529    #[test]
530    fn non_finite_floats_render_as_null() {
531        let v = DataValue::from_f64(f64::NAN);
532        assert_eq!(v.to_string(), "null");
533        let v = DataValue::from_f64(f64::INFINITY);
534        assert_eq!(v.to_string(), "null");
535    }
536
537    #[test]
538    fn owned_round_trip() {
539        let v: OwnedDataValue = r#"{"name":"alice","age":30}"#.parse().unwrap();
540        let serde: serde_json::Value = serde_json::from_str(&v.to_string()).unwrap();
541        assert_eq!(serde["name"], "alice");
542        assert_eq!(serde["age"], 30);
543    }
544
545    #[test]
546    fn write_json_into_buffer() {
547        let arena = Bump::new();
548        let v = DataValue::from_str(r#"[1,2,3]"#, &arena).unwrap();
549        let mut buf = Vec::new();
550        v.write_json_into(&mut buf);
551        assert_eq!(buf, b"[1,2,3]");
552    }
553
554    #[test]
555    fn pretty_matches_serde_json_pretty() {
556        let input = r#"{"a":[1,2,{"b":"hi","c":null}],"e":-3.5,"f":[],"g":{}}"#;
557        let arena = Bump::new();
558        let v = DataValue::from_str(input, &arena).unwrap();
559        let ours = v.pretty().to_string();
560        let serde: serde_json::Value = serde_json::from_str(input).unwrap();
561        let theirs = serde_json::to_string_pretty(&serde).unwrap();
562        assert_eq!(ours, theirs);
563    }
564
565    #[test]
566    fn pretty_owned_matches_serde_json_pretty() {
567        let input = r#"{"a":[1,2,{"b":"hi","c":null}],"e":-3.5,"f":[],"g":{}}"#;
568        let v: OwnedDataValue = input.parse().unwrap();
569        let serde: serde_json::Value = serde_json::from_str(input).unwrap();
570        assert_eq!(
571            v.pretty().to_string(),
572            serde_json::to_string_pretty(&serde).unwrap()
573        );
574    }
575
576    #[test]
577    fn pretty_empty_collections_inline() {
578        let arena = Bump::new();
579        let v = DataValue::from_str(r#"{"a":[],"b":{}}"#, &arena).unwrap();
580        assert_eq!(v.pretty().to_string(), "{\n  \"a\": [],\n  \"b\": {}\n}");
581    }
582
583    #[test]
584    fn pretty_deep_indent_beyond_64_spaces() {
585        // 35 levels deep -> 70 spaces of indent on the leaf line. Exercises
586        // the chunked SPACES write loop.
587        let arena = Bump::new();
588        let mut s = String::new();
589        for _ in 0..35 {
590            s.push('[');
591        }
592        s.push('1');
593        for _ in 0..35 {
594            s.push(']');
595        }
596        let v = DataValue::from_str(&s, &arena).unwrap();
597        let ours = v.pretty().to_string();
598        let serde: serde_json::Value = serde_json::from_str(&s).unwrap();
599        assert_eq!(ours, serde_json::to_string_pretty(&serde).unwrap());
600    }
601}