canon_json/
lib.rs

1// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4#![doc = include_str!("../README.md")]
5#![forbid(unsafe_code)]
6#![deny(missing_docs)]
7
8mod floatformat;
9
10use std::collections::BTreeMap;
11use std::io::{Error, ErrorKind, Result, Write};
12
13use serde::Serialize;
14use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer};
15
16/// A [`Formatter`] that produces canonical (RFC 8785) JSON.
17///
18/// See the [crate-level documentation](../index.html) for more detail.
19///
20/// [`Formatter`]: ../serde_json/ser/trait.Formatter.html
21#[derive(Debug, Default)]
22pub struct CanonicalFormatter {
23    object_stack: Vec<Object>,
24}
25
26/// https://www.rfc-editor.org/rfc/rfc8785#name-sorting-of-object-properties
27#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
28struct ObjectKey(Vec<u16>);
29
30impl ObjectKey {
31    fn new_from_str(s: &str) -> Self {
32        Self(s.encode_utf16().collect())
33    }
34
35    fn new_from_bytes(v: &[u8]) -> Result<Self> {
36        let s = std::str::from_utf8(v)
37            .map_err(|e| Error::new(ErrorKind::InvalidData, format!("Expected UTF-8 key: {e}")))?;
38        Ok(Self::new_from_str(s))
39    }
40
41    fn as_string(&self) -> Result<String> {
42        std::char::decode_utf16(self.0.iter().copied()).try_fold(String::new(), |mut acc, c| {
43            let c = c.map_err(|_| Error::new(ErrorKind::InvalidData, "Expected UTF-8 key"))?;
44            acc.push(c);
45            Ok(acc)
46        })
47    }
48
49    // Serialize this value as a JSON string
50    fn write_to<W: Write>(&self, w: W) -> Result<()> {
51        let s = self.as_string()?;
52        let val = serde_json::Value::String(s);
53        let mut s = Serializer::new(w);
54        val.serialize(&mut s).map_err(|e| {
55            if let Some(kind) = e.io_error_kind() {
56                Error::new(kind, "I/O error")
57            } else {
58                Error::new(ErrorKind::Other, e.to_string())
59            }
60        })
61    }
62}
63
64/// Internal struct to keep track of an object in progress of being built.
65///
66/// As keys and values are received by `CanonicalFormatter`, they are written to `next_key` and
67/// `next_value` by using the `CanonicalFormatter::writer` convenience method.
68///
69/// How this struct behaves when `Formatter` methods are called:
70///
71/// ```plain
72/// [other methods]  // values written to the writer received by method
73/// begin_object     // create this object
74/// /-> begin_object_key    // object.key_done = false;
75/// |   [other methods]     // values written to object.next_key, writer received by method ignored
76/// |   end_object_key      // object.key_done = true;
77/// |   begin_object_value  // [nothing]
78/// |   [other methods]     // values written to object.next_value
79/// |   end_object_value    // object.next_key and object.next_value are inserted into object.obj
80/// \---- // jump back if more values are present
81/// end_object       // write the object (sorted by its keys) to the writer received by the method
82/// ```
83#[derive(Debug, Default)]
84struct Object {
85    obj: BTreeMap<ObjectKey, Vec<u8>>,
86    next_key: Vec<u8>,
87    next_value: Vec<u8>,
88    key_done: bool,
89}
90
91/// A wrapper around a writer that directs output to either the underlying writer or a buffer.
92///
93/// This is used to capture the output for object keys and values before they are written to the
94/// final output, allowing for sorting of object properties.
95enum WriterTarget<'w, W> {
96    Underlying(W),
97    Buffer(&'w mut Vec<u8>),
98}
99
100impl<W: Write> Write for WriterTarget<'_, W> {
101    fn write(&mut self, buf: &[u8]) -> Result<usize> {
102        match self {
103            WriterTarget::Underlying(w) => w.write(buf),
104            WriterTarget::Buffer(b) => {
105                b.extend_from_slice(buf);
106                Ok(buf.len())
107            }
108        }
109    }
110
111    fn flush(&mut self) -> Result<()> {
112        match self {
113            WriterTarget::Underlying(w) => w.flush(),
114            WriterTarget::Buffer(_) => Ok(()),
115        }
116    }
117}
118
119impl CanonicalFormatter {
120    /// Create a new `CanonicalFormatter` object.
121    pub fn new() -> Self {
122        Self::default()
123    }
124
125    /// Convenience method to return the appropriate writer given the current context.
126    ///
127    /// If we are currently writing an object (that is, if `!self.object_stack.is_empty()`), we
128    /// need to write the value to either the next key or next value depending on that state
129    /// machine. See the docstrings for `Object` for more detail.
130    ///
131    /// If we are not currently writing an object, pass through `writer`.
132    fn writer<'a, W: Write + ?Sized>(
133        &'a mut self,
134        writer: &'a mut W,
135    ) -> WriterTarget<'a, &'a mut W> {
136        self.writer_or_key(writer, false).0
137    }
138
139    /// For string writes, we may be writing into the key. If so, then handle
140    /// that specially.
141    fn writer_or_key<'a, W: Write + ?Sized>(
142        &'a mut self,
143        writer: &'a mut W,
144        object_key_allowed: bool,
145    ) -> (WriterTarget<'a, &'a mut W>, bool) {
146        self.object_stack
147            .last_mut()
148            .map_or((WriterTarget::Underlying(writer), false), |object| {
149                let r = if object.key_done {
150                    &mut object.next_value
151                } else if !object_key_allowed {
152                    panic!("Unhandled write into object key");
153                } else {
154                    &mut object.next_key
155                };
156                (WriterTarget::Buffer(r), !object.key_done)
157            })
158    }
159
160    /// Returns a mutable reference to the top of the object stack.
161    fn obj_mut(&mut self) -> Result<&mut Object> {
162        self.object_stack.last_mut().ok_or_else(|| {
163            Error::new(
164                ErrorKind::Other,
165                "serde_json called an object method without calling begin_object first",
166            )
167        })
168    }
169}
170
171/// Wraps `serde_json::CompactFormatter` to use the appropriate writer (see
172/// `CanonicalFormatter::writer`).
173macro_rules! wrapper {
174    ($f:ident) => {
175        fn $f<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
176            CompactFormatter.$f(&mut self.writer(writer))
177        }
178    };
179
180    ($f:ident, $t:ty) => {
181        fn $f<W: Write + ?Sized>(&mut self, writer: &mut W, arg: $t) -> Result<()> {
182            CompactFormatter.$f(&mut self.writer(writer), arg)
183        }
184    };
185}
186
187impl Formatter for CanonicalFormatter {
188    wrapper!(write_null);
189    wrapper!(write_bool, bool);
190    wrapper!(write_i8, i8);
191    wrapper!(write_i16, i16);
192    wrapper!(write_i32, i32);
193    wrapper!(write_i64, i64);
194    wrapper!(write_i128, i128);
195    wrapper!(write_u8, u8);
196    wrapper!(write_u16, u16);
197    wrapper!(write_u32, u32);
198    wrapper!(write_u64, u64);
199    wrapper!(write_u128, u128);
200
201    fn write_f32<W: Write + ?Sized>(&mut self, writer: &mut W, value: f32) -> Result<()> {
202        self.write_f64(writer, value.into())
203    }
204
205    fn write_f64<W: Write + ?Sized>(&mut self, writer: &mut W, value: f64) -> Result<()> {
206        let v = floatformat::number_to_json(value).map_err(|e| {
207            Error::new(
208                ErrorKind::InvalidData,
209                format!("Unhandled floating point value {e}"),
210            )
211        })?;
212        CompactFormatter.write_string_fragment(&mut self.writer(writer), &v)
213    }
214
215    // By default this is only used for u128/i128. If serde_json's `arbitrary_precision` feature is
216    // enabled, all numbers are internally stored as strings, and this method is always used (even
217    // for floating point values).
218    fn write_number_str<W: Write + ?Sized>(&mut self, writer: &mut W, value: &str) -> Result<()> {
219        CompactFormatter.write_number_str(&mut self.writer(writer), value)
220    }
221
222    fn begin_string<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
223        let Some(v) = self.object_stack.last_mut() else {
224            return CompactFormatter.begin_string(writer);
225        };
226        if !v.key_done {
227            return Ok(());
228        }
229        CompactFormatter.begin_string(&mut v.next_value)
230    }
231
232    fn end_string<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
233        let Some(v) = self.object_stack.last_mut() else {
234            return CompactFormatter.end_string(writer);
235        };
236        if !v.key_done {
237            return Ok(());
238        }
239        CompactFormatter.end_string(&mut v.next_value)
240    }
241
242    fn write_string_fragment<W: Write + ?Sized>(
243        &mut self,
244        writer: &mut W,
245        fragment: &str,
246    ) -> Result<()> {
247        let (mut writer, in_key) = self.writer_or_key(writer, true);
248        if in_key {
249            writer.write_all(fragment.as_bytes())
250        } else {
251            CompactFormatter.write_string_fragment(&mut writer, fragment)
252        }
253    }
254
255    fn write_char_escape<W: Write + ?Sized>(
256        &mut self,
257        writer: &mut W,
258        char_escape: CharEscape,
259    ) -> Result<()> {
260        let (mut writer, in_key) = self.writer_or_key(writer, true);
261        if in_key {
262            let v = match char_escape {
263                CharEscape::Quote => b"\"",
264                CharEscape::ReverseSolidus => b"\\",
265                CharEscape::Solidus => b"/",
266                CharEscape::Backspace => b"\x08",
267                CharEscape::FormFeed => b"\x0C",
268                CharEscape::LineFeed => b"\n",
269                CharEscape::CarriageReturn => b"\r",
270                CharEscape::Tab => b"\t",
271                CharEscape::AsciiControl(c) => &[c],
272            };
273            writer.write_all(v)
274        } else {
275            CompactFormatter.write_char_escape(&mut writer, char_escape)
276        }
277    }
278
279    wrapper!(begin_array);
280    wrapper!(end_array);
281    wrapper!(begin_array_value, bool); // hack: this passes through the `first` argument
282    wrapper!(end_array_value);
283
284    // Here are the object methods. Because keys must be sorted, we serialize the object's keys and
285    // values in memory as a `BTreeMap`, then write it all out when `end_object_value` is called.
286
287    fn begin_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
288        CompactFormatter.begin_object(&mut self.writer(writer))?;
289        self.object_stack.push(Object::default());
290        Ok(())
291    }
292
293    fn end_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
294        let object = self.object_stack.pop().ok_or_else(|| {
295            Error::new(
296                ErrorKind::Other,
297                "serde_json called Formatter::end_object object method
298                 without calling begin_object first",
299            )
300        })?;
301        let mut writer = self.writer(writer);
302        let mut first = true;
303
304        for (key, value) in object.obj {
305            CompactFormatter.begin_object_key(&mut writer, first)?;
306            key.write_to(&mut writer)?;
307            CompactFormatter.end_object_key(&mut writer)?;
308
309            CompactFormatter.begin_object_value(&mut writer)?;
310            writer.write_all(&value)?;
311            CompactFormatter.end_object_value(&mut writer)?;
312
313            first = false;
314        }
315
316        CompactFormatter.end_object(&mut writer)
317    }
318
319    fn begin_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W, _first: bool) -> Result<()> {
320        let object = self.obj_mut()?;
321        object.key_done = false;
322        Ok(())
323    }
324
325    fn end_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
326        let object = self.obj_mut()?;
327        object.key_done = true;
328        Ok(())
329    }
330
331    fn begin_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
332        Ok(())
333    }
334
335    fn end_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
336        let object = self.obj_mut()?;
337        let key = std::mem::take(&mut object.next_key);
338        let value = std::mem::take(&mut object.next_value);
339        // Canonialize as UTF-16
340        object.obj.insert(ObjectKey::new_from_bytes(&key)?, value);
341        Ok(())
342    }
343
344    // This is for serde_json's `raw_value` feature, which provides a RawValue type that is passed
345    // through as-is. That's not good enough for canonical JSON, so we parse it and immediately
346    // write it back out... as canonical JSON.
347    fn write_raw_fragment<W: Write + ?Sized>(
348        &mut self,
349        writer: &mut W,
350        fragment: &str,
351    ) -> Result<()> {
352        let mut ser = Serializer::with_formatter(self.writer(writer), Self::new());
353        serde_json::from_str::<serde_json::Value>(fragment)?.serialize(&mut ser)?;
354        Ok(())
355    }
356}
357
358/// A helper trait to write canonical JSON.
359pub trait CanonJsonSerialize {
360    /// Serialize the given data structure as JSON into the I/O stream.
361    fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
362    where
363        W: Write;
364    /// Serialize the given data structure as a JSON byte vector.
365    fn to_canon_json_vec(&self) -> Result<Vec<u8>>;
366    /// Serialize the given data structure as a String.
367    fn to_canon_json_string(&self) -> Result<String>;
368}
369
370impl<S> CanonJsonSerialize for S
371where
372    S: Serialize,
373{
374    fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
375    where
376        W: Write,
377    {
378        let mut ser = Serializer::with_formatter(writer, CanonicalFormatter::new());
379        Ok(self.serialize(&mut ser)?)
380    }
381
382    fn to_canon_json_vec(&self) -> Result<Vec<u8>> {
383        let mut buf = Vec::new();
384        self.to_canon_json_writer(&mut buf)?;
385        Ok(buf)
386    }
387
388    fn to_canon_json_string(&self) -> Result<String> {
389        String::from_utf8(self.to_canon_json_vec()?)
390            .map_err(|err| Error::new(ErrorKind::InvalidData, err))
391    }
392}
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397
398    use std::{cmp::Ordering, io::Result};
399
400    use proptest::prelude::*;
401    use serde_json::Number;
402    use sha2::{Digest, Sha256};
403    use similar_asserts::assert_eq;
404
405    #[test]
406    fn test_object_key() {
407        let cases = [("\n", "1"), ("\r", "<script>"), ("ö", "דּ")];
408        for case in cases {
409            assert_eq!(case.0.cmp(case.1), Ordering::Less);
410        }
411        let mut v = cases
412            .iter()
413            .flat_map(|v| [v.0, v.1])
414            .collect::<std::collections::BTreeSet<_>>()
415            .into_iter();
416        assert_eq!(v.next().unwrap(), "\n");
417        assert_eq!(v.next().unwrap(), "\r");
418        assert_eq!(v.next().unwrap(), "1");
419        assert_eq!(v.next().unwrap(), "<script>");
420        assert_eq!(v.next().unwrap(), "ö");
421        assert_eq!(v.next().unwrap(), "דּ");
422
423        let mut buf = Vec::new();
424        ObjectKey::new_from_str("").write_to(&mut buf).unwrap();
425        assert_eq!(&buf, b"\"\"");
426    }
427
428    /// Small wrapper around the `serde_json` json! macro to encode the value as canonical JSON.
429    macro_rules! encode {
430        ($($tt:tt)+) => {
431            (|v: serde_json::Value| -> Result<Vec<u8>> {
432                v.to_canon_json_vec()
433            })(serde_json::json!($($tt)+))
434        };
435    }
436
437    /// These smoke tests come from securesystemslib, the library used by the TUF reference
438    /// implementation.
439    ///
440    /// `<https://github.com/secure-systems-lab/securesystemslib/blob/f466266014aff529510216b8c2f8c8f39de279ec/tests/test_formats.py#L354-L389>`
441    #[test]
442    fn securesystemslib_asserts() -> Result<()> {
443        assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
444        assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
445        assert_eq!(encode!([])?, b"[]");
446        assert_eq!(encode!({})?, b"{}");
447        assert_eq!(encode!({"A": [99]})?, br#"{"A":[99]}"#);
448        assert_eq!(encode!({"A": true})?, br#"{"A":true}"#);
449        assert_eq!(encode!({"B": false})?, br#"{"B":false}"#);
450        assert_eq!(encode!({"x": 3, "y": 2})?, br#"{"x":3,"y":2}"#);
451        assert_eq!(encode!({"x": 3, "y": null})?, br#"{"x":3,"y":null}"#);
452
453        Ok(())
454    }
455
456    /// A more involved test than any of the above for our core competency: ordering things.
457    #[test]
458    fn ordered_nested_object() -> Result<()> {
459        assert_eq!(
460            encode!({
461                "nested": {
462                    "bad": true,
463                    "good": false
464                },
465                "b": 2,
466                "a": 1,
467                "c": {
468                    "h": {
469                        "h": -5,
470                        "i": 3
471                    },
472                    "a": null,
473                    "x": {}
474                }
475            })?,
476            br#"{"a":1,"b":2,"c":{"a":null,"h":{"h":-5,"i":3},"x":{}},"nested":{"bad":true,"good":false}}"#.to_vec(),
477        );
478
479        Ok(())
480    }
481
482    /// This test asserts that the canonical representation of some real-world data always comes
483    /// out the same.
484    #[allow(clippy::unreadable_literal)]
485    #[test]
486    fn actual_tuf_signed() {
487        let encode_result = encode!(
488        {
489          "signed": {
490            "_type": "timestamp",
491            "spec_version": "1.0.0",
492            "version": 1604605512,
493            "expires": "2020-11-12T19:45:12.613154979Z",
494            "meta": {
495              "snapshot.json": {
496                "length": 1278,
497                "hashes": {
498                  "sha256": "56c4ecc3b331f6154d9a5005f6e2978e4198cc8c3b79746c25a592043a2d83d4"
499                },
500                "version": 1604605512
501              }
502            }
503          }
504        }
505        );
506
507        let encoded = encode_result.unwrap();
508        let expected: Vec<u8> = vec![
509            123, 34, 115, 105, 103, 110, 101, 100, 34, 58, 123, 34, 95, 116, 121, 112, 101, 34, 58,
510            34, 116, 105, 109, 101, 115, 116, 97, 109, 112, 34, 44, 34, 101, 120, 112, 105, 114,
511            101, 115, 34, 58, 34, 50, 48, 50, 48, 45, 49, 49, 45, 49, 50, 84, 49, 57, 58, 52, 53,
512            58, 49, 50, 46, 54, 49, 51, 49, 53, 52, 57, 55, 57, 90, 34, 44, 34, 109, 101, 116, 97,
513            34, 58, 123, 34, 115, 110, 97, 112, 115, 104, 111, 116, 46, 106, 115, 111, 110, 34, 58,
514            123, 34, 104, 97, 115, 104, 101, 115, 34, 58, 123, 34, 115, 104, 97, 50, 53, 54, 34,
515            58, 34, 53, 54, 99, 52, 101, 99, 99, 51, 98, 51, 51, 49, 102, 54, 49, 53, 52, 100, 57,
516            97, 53, 48, 48, 53, 102, 54, 101, 50, 57, 55, 56, 101, 52, 49, 57, 56, 99, 99, 56, 99,
517            51, 98, 55, 57, 55, 52, 54, 99, 50, 53, 97, 53, 57, 50, 48, 52, 51, 97, 50, 100, 56,
518            51, 100, 52, 34, 125, 44, 34, 108, 101, 110, 103, 116, 104, 34, 58, 49, 50, 55, 56, 44,
519            34, 118, 101, 114, 115, 105, 111, 110, 34, 58, 49, 54, 48, 52, 54, 48, 53, 53, 49, 50,
520            125, 125, 44, 34, 115, 112, 101, 99, 95, 118, 101, 114, 115, 105, 111, 110, 34, 58, 34,
521            49, 46, 48, 46, 48, 34, 44, 34, 118, 101, 114, 115, 105, 111, 110, 34, 58, 49, 54, 48,
522            52, 54, 48, 53, 53, 49, 50, 125, 125,
523        ];
524        assert_eq!(expected, encoded);
525    }
526
527    #[test]
528    fn encode_u128_i128() {
529        #[derive(serde_derive::Serialize)]
530        struct Object {
531            u128: u128,
532            i128: i128,
533        }
534
535        let value = Object {
536            u128: u128::MAX,
537            i128: i128::MIN,
538        };
539
540        let expected = [
541            123, 34, 105, 49, 50, 56, 34, 58, 45, 49, 55, 48, 49, 52, 49, 49, 56, 51, 52, 54, 48,
542            52, 54, 57, 50, 51, 49, 55, 51, 49, 54, 56, 55, 51, 48, 51, 55, 49, 53, 56, 56, 52, 49,
543            48, 53, 55, 50, 56, 44, 34, 117, 49, 50, 56, 34, 58, 51, 52, 48, 50, 56, 50, 51, 54,
544            54, 57, 50, 48, 57, 51, 56, 52, 54, 51, 52, 54, 51, 51, 55, 52, 54, 48, 55, 52, 51, 49,
545            55, 54, 56, 50, 49, 49, 52, 53, 53, 125,
546        ];
547
548        assert_eq!(value.to_canon_json_vec().unwrap(), expected);
549    }
550
551    #[test]
552    fn test_basic() {
553        let v = serde_json::json! { { "foo": "42" } };
554        let expected = serde_json::to_string(&v).unwrap();
555        let buf = String::from_utf8(encode!(v).unwrap()).unwrap();
556        assert_eq!(&buf, &expected);
557    }
558
559    /// As it says, generate arbitrary JSON. This is based on
560    /// https://proptest-rs.github.io/proptest/proptest/tutorial/recursive.html
561    ///
562    /// We support controlling the regex for keys, and whether or not floating point values are emitted.
563    fn arbitrary_json(
564        keyspace: &'static str,
565        allow_fp: bool,
566    ) -> impl Strategy<Value = serde_json::Value> {
567        use serde_json::Value;
568        let leaf = prop_oneof![
569            Just(Value::Null),
570            any::<f64>().prop_filter_map("valid f64 for JSON", move |v| {
571                let n = if allow_fp && v.fract() != 0.0 {
572                    Number::from_f64(v).unwrap()
573                } else {
574                    // Constrain to values clearly lower than
575                    // the https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER
576                    Number::from_u128(v as u32 as u128).unwrap()
577                };
578                Some(Value::Number(n))
579            }),
580            any::<bool>().prop_map(Value::Bool),
581            keyspace.prop_map(Value::String),
582        ];
583        leaf.prop_recursive(
584            8,   // 8 levels deep
585            256, // Shoot for maximum size of 256 nodes
586            10,  // We put up to 10 items per collection
587            move |inner| {
588                prop_oneof![
589                    // Take the inner strategy and make the two recursive cases.
590                    prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array),
591                    prop::collection::hash_map(keyspace, inner, 0..10)
592                        .prop_map(|v| { v.into_iter().collect() }),
593                ]
594            },
595        )
596    }
597
598    proptest! {
599        #[test]
600        fn roundtrip_rfc8785(v in arbitrary_json(".*", true)) {
601            let buf = encode!(&v).unwrap();
602            let v2: serde_json::Value = serde_json::from_slice(&buf)
603                .map_err(|e| format!("Failed to parse {v:?} -> {}: {e}", String::from_utf8_lossy(&buf))).unwrap();
604            assert_eq!(&v, &v2);
605        }
606    }
607
608    fn verify(input: &str, expected: &str) {
609        let input: serde_json::Value = serde_json::from_str(input).unwrap();
610        assert_eq!(expected, input.to_canon_json_string().unwrap());
611    }
612
613    #[test]
614    fn test_arrays() {
615        verify(
616            include_str!("../testdata/input/arrays.json"),
617            include_str!("../testdata/output/arrays.json"),
618        );
619    }
620
621    #[test]
622    fn test_french() {
623        verify(
624            include_str!("../testdata/input/french.json"),
625            include_str!("../testdata/output/french.json"),
626        );
627    }
628
629    #[test]
630    fn test_structures() {
631        verify(
632            include_str!("../testdata/input/structures.json"),
633            include_str!("../testdata/output/structures.json"),
634        );
635    }
636
637    #[test]
638    fn test_unicode() {
639        verify(
640            include_str!("../testdata/input/unicode.json"),
641            include_str!("../testdata/output/unicode.json"),
642        );
643    }
644
645    #[test]
646    fn test_values() {
647        verify(
648            include_str!("../testdata/input/values.json"),
649            include_str!("../testdata/output/values.json"),
650        );
651    }
652
653    #[test]
654    fn test_weird() {
655        verify(
656            include_str!("../testdata/input/weird.json"),
657            include_str!("../testdata/output/weird.json"),
658        );
659    }
660
661    #[test]
662    fn test_from_testdata() -> Result<()> {
663        use cap_std;
664
665        let amb = cap_std::ambient_authority();
666        let root =
667            cap_std::fs::Dir::open_ambient_dir(std::env::var("CARGO_MANIFEST_DIR").unwrap(), amb)?;
668        let dir = root.open_dir("testdata-cjson-orig")?;
669        for entry in dir.entries()? {
670            let entry = entry?;
671            let filename = entry.file_name();
672            let filename = filename.to_str().unwrap();
673            match filename {
674                "errors" => continue,
675                "LICENSE" => continue,
676                _ => {}
677            }
678
679            let json: serde_json::Value = serde_json::from_reader(entry.open()?)?;
680            let enc = encode!(json)?;
681            let mut sha256 = Sha256::new();
682            sha256.update(&enc);
683
684            // testdata sha256sum are computed with a trailing \n
685            sha256.update("\n");
686            let filename = filename.trim_end_matches(".json");
687            let hash = format!("{:x}", sha256.finalize());
688            assert_eq!(filename, hash);
689            let json2: serde_json::Value = serde_json::from_slice(&enc)?;
690
691            assert_eq!(json, json2)
692        }
693
694        Ok(())
695    }
696
697    // Regex that excludes basically everything except printable ASCII
698    // because we know that e.g. olpc-cjson bombs on control characters,
699    // and also because it does NFC orering that will cause non-equivalency
700    // for some whitespace etc.
701    const ASCII_ALPHANUMERIC: &str = r"[a-zA-Z0-9]*";
702
703    proptest! {
704        // Verify strict equivalency with printable ASCII only keys
705        #[test]
706        fn crosscheck_olpc_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
707            let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
708            let mut olpc_cjson_serialized = Vec::new();
709            let mut ser = serde_json::Serializer::with_formatter(&mut olpc_cjson_serialized, olpc_cjson::CanonicalFormatter::new());
710            v.serialize(&mut ser).unwrap();
711            assert_eq!(canon_json, String::from_utf8(olpc_cjson_serialized).unwrap());
712        }
713    }
714
715    proptest! {
716        // Verify strict equivalency with printable ASCII only keys
717        #[test]
718        fn crosscheck_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
719            let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
720            let cjson = String::from_utf8(cjson::to_vec(&v).unwrap()).unwrap();
721            assert_eq!(canon_json, cjson);
722        }
723
724        // Verify equivalency (after sorting) with non-ASCII keys
725        #[test]
726        fn crosscheck_cjson(v in arbitrary_json(".*", false)) {
727            let buf = encode!(&v).unwrap();
728            let self_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
729            let buf = cjson::to_vec(&v).unwrap();
730            let cjson_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
731            // As above with olpc-cjson, this relies on the fact that serde_json
732            // sorts object keys by default.
733            assert_eq!(self_reparsed, v);
734            assert_eq!(cjson_reparsed, v);
735        }
736    }
737}