fast_cjson/
lib.rs

1#![forbid(rust_2018_idioms)]
2#![deny(unsafe_code)]
3#![warn(clippy::all, clippy::pedantic)]
4
5use either::Either;
6use icu_normalizer::ComposingNormalizer;
7use memchr::memchr3;
8use serde::Serialize;
9use serde_json::{
10    Serializer,
11    ser::{CharEscape, CompactFormatter, Formatter},
12};
13use std::{
14    collections::BTreeMap,
15    io::{self, Write as _},
16    mem,
17};
18
19#[derive(Debug)]
20enum Collecting {
21    Key(Vec<u8>),
22    Value { key: Vec<u8>, value: Vec<u8> },
23}
24
25impl Default for Collecting {
26    fn default() -> Self {
27        Self::Key(Vec::new())
28    }
29}
30
31#[derive(Debug, Default)]
32struct Object {
33    obj: BTreeMap<Vec<u8>, Vec<u8>>,
34    state: Collecting,
35}
36
37/// A [`Formatter`](serde_json::format::Formatter) that produces canonical JSON.
38#[derive(Debug, Default)]
39pub struct CanonicalFormatter {
40    object_stack: Vec<Object>,
41}
42
43impl CanonicalFormatter {
44    /// Create a new `CanonicalFormatter` object.
45    #[inline]
46    #[must_use]
47    pub fn new() -> Self {
48        Self::default()
49    }
50
51    /// Convenience method to return the appropriate writer given the current context.
52    ///
53    /// If we are currently writing an object (that is, if `!self.object_stack.is_empty()`), we
54    /// need to write the value to either the next key or next value depending on that state
55    /// machine. See the docstrings for `Object` for more detail.
56    ///
57    /// If we are not currently writing an object, pass through `writer`.
58    #[inline]
59    fn writer<'a, W>(&'a mut self, writer: &'a mut W) -> impl io::Write + 'a
60    where
61        W: io::Write + ?Sized,
62    {
63        self.object_stack.last_mut().map_or_else(
64            || Either::Right(writer),
65            |object| {
66                let container = match object.state {
67                    Collecting::Key(ref mut key) => key,
68                    Collecting::Value { ref mut value, .. } => value,
69                };
70
71                Either::Left(container)
72            },
73        )
74    }
75
76    /// Returns a mutable reference to the top of the object stack.
77    #[inline]
78    fn obj_mut(&mut self) -> io::Result<&mut Object> {
79        self.object_stack.last_mut().ok_or_else(|| {
80            io::Error::other(
81                "Serializer called an object method without calling begin_object first",
82            )
83        })
84    }
85}
86
87/// Wraps `serde_json::CompactFormatter` to use the appropriate writer (see
88/// `CanonicalFormatter::writer`).
89macro_rules! wrapper {
90    ($f:ident) => {
91        #[inline]
92        fn $f<W: io::Write + ?Sized>(&mut self, writer: &mut W) -> io::Result<()> {
93            CompactFormatter.$f(&mut self.writer(writer))
94        }
95    };
96
97    ($f:ident, $t:ty) => {
98        #[inline]
99        fn $f<W: io::Write + ?Sized>(&mut self, writer: &mut W, arg: $t) -> io::Result<()> {
100            CompactFormatter.$f(&mut self.writer(writer), arg)
101        }
102    };
103
104    ($( $f:ident $(, $t:ty)?);* $(;)?) => {
105        $(
106            wrapper!(
107                $f
108                $(, $t)?
109            );
110        )*
111    };
112}
113
114macro_rules! float_err {
115    () => {
116        Err(io::Error::new(
117            io::ErrorKind::InvalidInput,
118            "floating point numbers are not allowed",
119        ))
120    };
121}
122
123impl Formatter for CanonicalFormatter {
124    wrapper! {
125        write_null;
126        write_bool, bool;
127    }
128
129    wrapper! {
130        write_i8, i8;
131        write_i16, i16;
132        write_i32, i32;
133        write_i64, i64;
134        write_i128, i128;
135    }
136
137    wrapper! {
138        write_u8, u8;
139        write_u16, u16;
140        write_u32, u32;
141        write_u64, u64;
142        write_u128, u128;
143    }
144
145    wrapper! {
146        write_byte_array, &[u8];
147    }
148
149    wrapper! {
150        begin_string;
151        end_string;
152    }
153
154    wrapper! {
155        begin_array;
156        end_array;
157        begin_array_value, bool;
158        end_array_value;
159    }
160
161    #[inline]
162    fn write_f32<W: io::Write + ?Sized>(&mut self, _writer: &mut W, _value: f32) -> io::Result<()> {
163        float_err!()
164    }
165
166    #[inline]
167    fn write_f64<W: io::Write + ?Sized>(&mut self, _writer: &mut W, _value: f64) -> io::Result<()> {
168        float_err!()
169    }
170
171    // If serde_json's `arbitrary_precision` feature is enabled, all numbers are internally stored as strings,
172    // and this method is always used (even for floating point values).
173    #[inline]
174    fn write_number_str<W: io::Write + ?Sized>(
175        &mut self,
176        writer: &mut W,
177        value: &str,
178    ) -> io::Result<()> {
179        if memchr3(b'.', b'e', b'E', value.as_bytes()).is_some() {
180            float_err!()
181        } else {
182            CompactFormatter.write_number_str(&mut self.writer(writer), value)
183        }
184    }
185
186    #[inline]
187    fn write_char_escape<W>(&mut self, writer: &mut W, char_escape: CharEscape) -> io::Result<()>
188    where
189        W: ?Sized + io::Write,
190    {
191        // CJSON wants us to escape backslashes and double quotes.
192        // And only backslashes and double quotes.
193        if matches!(char_escape, CharEscape::Quote | CharEscape::ReverseSolidus) {
194            self.writer(writer).write_all(b"\\")?;
195        }
196
197        let byte = match char_escape {
198            CharEscape::Quote => b'"',
199            CharEscape::ReverseSolidus => b'\\',
200            CharEscape::Solidus => b'/',
201            CharEscape::Backspace => b'\x08',
202            CharEscape::FormFeed => b'\x0c',
203            CharEscape::LineFeed => b'\n',
204            CharEscape::CarriageReturn => b'\r',
205            CharEscape::Tab => b'\t',
206            CharEscape::AsciiControl(byte) => byte,
207        };
208        self.writer(writer).write_all(&[byte])
209    }
210
211    #[inline]
212    fn write_raw_fragment<W>(&mut self, writer: &mut W, fragment: &str) -> io::Result<()>
213    where
214        W: ?Sized + io::Write,
215    {
216        let mut ser = Serializer::with_formatter(self.writer(writer), Self::new());
217        serde_json::from_str::<serde_json::Value>(fragment)?.serialize(&mut ser)?;
218
219        Ok(())
220    }
221
222    #[inline]
223    fn write_string_fragment<W>(&mut self, writer: &mut W, fragment: &str) -> io::Result<()>
224    where
225        W: ?Sized + io::Write,
226    {
227        let normalizer = const { ComposingNormalizer::new_nfc() };
228        for ch in normalizer.normalize_iter(fragment.chars()) {
229            self.writer(writer)
230                .write_all(ch.encode_utf8(&mut [0; 4]).as_bytes())?;
231        }
232
233        Ok(())
234    }
235
236    // Here are the object methods. Because keys must be sorted, we serialize the object's keys and
237    // values in memory as a `BTreeMap`, then write it all out when `end_object_value` is called.
238
239    #[inline]
240    fn begin_object<W: io::Write + ?Sized>(&mut self, writer: &mut W) -> io::Result<()> {
241        CompactFormatter.begin_object(&mut self.writer(writer))?;
242        self.object_stack.push(Object::default());
243        Ok(())
244    }
245
246    #[inline]
247    fn end_object<W: io::Write + ?Sized>(&mut self, writer: &mut W) -> io::Result<()> {
248        let object = self.object_stack.pop().ok_or_else(|| {
249            io::Error::other(
250                "serde_json called Formatter::end_object object method
251                 without calling begin_object first",
252            )
253        })?;
254
255        let mut first = true;
256        let mut writer = self.writer(writer);
257
258        for (key, value) in object.obj {
259            CompactFormatter.begin_object_key(&mut writer, first)?;
260            writer.write_all(&key)?;
261            CompactFormatter.end_object_key(&mut writer)?;
262
263            CompactFormatter.begin_object_value(&mut writer)?;
264            writer.write_all(&value)?;
265            CompactFormatter.end_object_value(&mut writer)?;
266
267            first = false;
268        }
269
270        CompactFormatter.end_object(&mut writer)
271    }
272
273    #[inline]
274    fn begin_object_key<W: io::Write + ?Sized>(
275        &mut self,
276        _writer: &mut W,
277        _first: bool,
278    ) -> io::Result<()> {
279        let object = self.obj_mut()?;
280        object.state = Collecting::Key(Vec::new());
281
282        Ok(())
283    }
284
285    #[inline]
286    fn end_object_key<W: io::Write + ?Sized>(&mut self, _writer: &mut W) -> io::Result<()> {
287        let object = self.obj_mut()?;
288
289        let Collecting::Key(key) = &mut object.state else {
290            unreachable!();
291        };
292
293        object.state = Collecting::Value {
294            key: mem::take(key),
295            value: Vec::new(),
296        };
297
298        Ok(())
299    }
300
301    #[inline]
302    fn begin_object_value<W: io::Write + ?Sized>(&mut self, _writer: &mut W) -> io::Result<()> {
303        Ok(())
304    }
305
306    #[inline]
307    fn end_object_value<W: io::Write + ?Sized>(&mut self, _writer: &mut W) -> io::Result<()> {
308        let object = self.obj_mut()?;
309        let Collecting::Value { key, value } = &mut object.state else {
310            unreachable!();
311        };
312
313        object.obj.insert(mem::take(key), mem::take(value));
314
315        Ok(())
316    }
317}