1#![doc = include_str!("../README.md")]
5#![forbid(unsafe_code)]
6#![deny(missing_docs)]
7
8mod floatformat;
9
10use std::collections::BTreeMap;
11use std::io::{Error, ErrorKind, Result, Write};
12
13use serde::Serialize;
14use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer};
15
16#[derive(Debug, Default)]
22pub struct CanonicalFormatter {
23 object_stack: Vec<Object>,
24}
25
26#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
28struct ObjectKey(Vec<u16>);
29
30impl ObjectKey {
31 fn new_from_str(s: &str) -> Self {
32 Self(s.encode_utf16().collect())
33 }
34
35 fn new_from_bytes(v: &[u8]) -> Result<Self> {
36 let s = std::str::from_utf8(v)
37 .map_err(|e| Error::new(ErrorKind::InvalidData, format!("Expected UTF-8 key: {e}")))?;
38 Ok(Self::new_from_str(s))
39 }
40
41 fn as_string(&self) -> Result<String> {
42 std::char::decode_utf16(self.0.iter().copied()).try_fold(String::new(), |mut acc, c| {
43 let c = c.map_err(|_| Error::new(ErrorKind::InvalidData, "Expected UTF-8 key"))?;
44 acc.push(c);
45 Ok(acc)
46 })
47 }
48
49 fn write_to<W: Write>(&self, w: W) -> Result<()> {
51 let s = self.as_string()?;
52 let val = serde_json::Value::String(s);
53 let mut s = Serializer::new(w);
54 val.serialize(&mut s).map_err(|e| {
55 if let Some(kind) = e.io_error_kind() {
56 Error::new(kind, "I/O error")
57 } else {
58 Error::new(ErrorKind::Other, e.to_string())
59 }
60 })
61 }
62}
63
64#[derive(Debug, Default)]
84struct Object {
85 obj: BTreeMap<ObjectKey, Vec<u8>>,
86 next_key: Vec<u8>,
87 next_value: Vec<u8>,
88 key_done: bool,
89}
90
91enum WriterTarget<'w, W> {
96 Underlying(W),
97 Buffer(&'w mut Vec<u8>),
98}
99
100impl<W: Write> Write for WriterTarget<'_, W> {
101 fn write(&mut self, buf: &[u8]) -> Result<usize> {
102 match self {
103 WriterTarget::Underlying(w) => w.write(buf),
104 WriterTarget::Buffer(b) => {
105 b.extend_from_slice(buf);
106 Ok(buf.len())
107 }
108 }
109 }
110
111 fn flush(&mut self) -> Result<()> {
112 match self {
113 WriterTarget::Underlying(w) => w.flush(),
114 WriterTarget::Buffer(_) => Ok(()),
115 }
116 }
117}
118
119impl CanonicalFormatter {
120 pub fn new() -> Self {
122 Self::default()
123 }
124
125 fn writer<'a, W: Write + ?Sized>(
133 &'a mut self,
134 writer: &'a mut W,
135 ) -> WriterTarget<'a, &'a mut W> {
136 self.writer_or_key(writer, false).0
137 }
138
139 fn writer_or_key<'a, W: Write + ?Sized>(
142 &'a mut self,
143 writer: &'a mut W,
144 object_key_allowed: bool,
145 ) -> (WriterTarget<'a, &'a mut W>, bool) {
146 self.object_stack
147 .last_mut()
148 .map_or((WriterTarget::Underlying(writer), false), |object| {
149 let r = if object.key_done {
150 &mut object.next_value
151 } else if !object_key_allowed {
152 panic!("Unhandled write into object key");
153 } else {
154 &mut object.next_key
155 };
156 (WriterTarget::Buffer(r), !object.key_done)
157 })
158 }
159
160 fn obj_mut(&mut self) -> Result<&mut Object> {
162 self.object_stack.last_mut().ok_or_else(|| {
163 Error::new(
164 ErrorKind::Other,
165 "serde_json called an object method without calling begin_object first",
166 )
167 })
168 }
169}
170
171macro_rules! wrapper {
174 ($f:ident) => {
175 fn $f<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
176 CompactFormatter.$f(&mut self.writer(writer))
177 }
178 };
179
180 ($f:ident, $t:ty) => {
181 fn $f<W: Write + ?Sized>(&mut self, writer: &mut W, arg: $t) -> Result<()> {
182 CompactFormatter.$f(&mut self.writer(writer), arg)
183 }
184 };
185}
186
187impl Formatter for CanonicalFormatter {
188 wrapper!(write_null);
189 wrapper!(write_bool, bool);
190 wrapper!(write_i8, i8);
191 wrapper!(write_i16, i16);
192 wrapper!(write_i32, i32);
193 wrapper!(write_i64, i64);
194 wrapper!(write_i128, i128);
195 wrapper!(write_u8, u8);
196 wrapper!(write_u16, u16);
197 wrapper!(write_u32, u32);
198 wrapper!(write_u64, u64);
199 wrapper!(write_u128, u128);
200
201 fn write_f32<W: Write + ?Sized>(&mut self, writer: &mut W, value: f32) -> Result<()> {
202 self.write_f64(writer, value.into())
203 }
204
205 fn write_f64<W: Write + ?Sized>(&mut self, writer: &mut W, value: f64) -> Result<()> {
206 let v = floatformat::number_to_json(value).map_err(|e| {
207 Error::new(
208 ErrorKind::InvalidData,
209 format!("Unhandled floating point value {e}"),
210 )
211 })?;
212 CompactFormatter.write_string_fragment(&mut self.writer(writer), &v)
213 }
214
215 fn write_number_str<W: Write + ?Sized>(&mut self, writer: &mut W, value: &str) -> Result<()> {
219 CompactFormatter.write_number_str(&mut self.writer(writer), value)
220 }
221
222 fn begin_string<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
223 let Some(v) = self.object_stack.last_mut() else {
224 return CompactFormatter.begin_string(writer);
225 };
226 if !v.key_done {
227 return Ok(());
228 }
229 CompactFormatter.begin_string(&mut v.next_value)
230 }
231
232 fn end_string<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
233 let Some(v) = self.object_stack.last_mut() else {
234 return CompactFormatter.end_string(writer);
235 };
236 if !v.key_done {
237 return Ok(());
238 }
239 CompactFormatter.end_string(&mut v.next_value)
240 }
241
242 fn write_string_fragment<W: Write + ?Sized>(
243 &mut self,
244 writer: &mut W,
245 fragment: &str,
246 ) -> Result<()> {
247 let (mut writer, in_key) = self.writer_or_key(writer, true);
248 if in_key {
249 writer.write_all(fragment.as_bytes())
250 } else {
251 CompactFormatter.write_string_fragment(&mut writer, fragment)
252 }
253 }
254
255 fn write_char_escape<W: Write + ?Sized>(
256 &mut self,
257 writer: &mut W,
258 char_escape: CharEscape,
259 ) -> Result<()> {
260 let (mut writer, in_key) = self.writer_or_key(writer, true);
261 if in_key {
262 let v = match char_escape {
263 CharEscape::Quote => b"\"",
264 CharEscape::ReverseSolidus => b"\\",
265 CharEscape::Solidus => b"/",
266 CharEscape::Backspace => b"\x08",
267 CharEscape::FormFeed => b"\x0C",
268 CharEscape::LineFeed => b"\n",
269 CharEscape::CarriageReturn => b"\r",
270 CharEscape::Tab => b"\t",
271 CharEscape::AsciiControl(c) => &[c],
272 };
273 writer.write_all(v)
274 } else {
275 CompactFormatter.write_char_escape(&mut writer, char_escape)
276 }
277 }
278
279 wrapper!(begin_array);
280 wrapper!(end_array);
281 wrapper!(begin_array_value, bool); wrapper!(end_array_value);
283
284 fn begin_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
288 CompactFormatter.begin_object(&mut self.writer(writer))?;
289 self.object_stack.push(Object::default());
290 Ok(())
291 }
292
293 fn end_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
294 let object = self.object_stack.pop().ok_or_else(|| {
295 Error::new(
296 ErrorKind::Other,
297 "serde_json called Formatter::end_object object method
298 without calling begin_object first",
299 )
300 })?;
301 let mut writer = self.writer(writer);
302 let mut first = true;
303
304 for (key, value) in object.obj {
305 CompactFormatter.begin_object_key(&mut writer, first)?;
306 key.write_to(&mut writer)?;
307 CompactFormatter.end_object_key(&mut writer)?;
308
309 CompactFormatter.begin_object_value(&mut writer)?;
310 writer.write_all(&value)?;
311 CompactFormatter.end_object_value(&mut writer)?;
312
313 first = false;
314 }
315
316 CompactFormatter.end_object(&mut writer)
317 }
318
319 fn begin_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W, _first: bool) -> Result<()> {
320 let object = self.obj_mut()?;
321 object.key_done = false;
322 Ok(())
323 }
324
325 fn end_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
326 let object = self.obj_mut()?;
327 object.key_done = true;
328 Ok(())
329 }
330
331 fn begin_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
332 Ok(())
333 }
334
335 fn end_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
336 let object = self.obj_mut()?;
337 let key = std::mem::take(&mut object.next_key);
338 let value = std::mem::take(&mut object.next_value);
339 object.obj.insert(ObjectKey::new_from_bytes(&key)?, value);
341 Ok(())
342 }
343
344 fn write_raw_fragment<W: Write + ?Sized>(
348 &mut self,
349 writer: &mut W,
350 fragment: &str,
351 ) -> Result<()> {
352 let mut ser = Serializer::with_formatter(self.writer(writer), Self::new());
353 serde_json::from_str::<serde_json::Value>(fragment)?.serialize(&mut ser)?;
354 Ok(())
355 }
356}
357
358pub trait CanonJsonSerialize {
360 fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
362 where
363 W: Write;
364 fn to_canon_json_vec(&self) -> Result<Vec<u8>>;
366 fn to_canon_json_string(&self) -> Result<String>;
368}
369
370impl<S> CanonJsonSerialize for S
371where
372 S: Serialize,
373{
374 fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
375 where
376 W: Write,
377 {
378 let mut ser = Serializer::with_formatter(writer, CanonicalFormatter::new());
379 Ok(self.serialize(&mut ser)?)
380 }
381
382 fn to_canon_json_vec(&self) -> Result<Vec<u8>> {
383 let mut buf = Vec::new();
384 self.to_canon_json_writer(&mut buf)?;
385 Ok(buf)
386 }
387
388 fn to_canon_json_string(&self) -> Result<String> {
389 String::from_utf8(self.to_canon_json_vec()?)
390 .map_err(|err| Error::new(ErrorKind::InvalidData, err))
391 }
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397
398 use std::{cmp::Ordering, io::Result};
399
400 use proptest::prelude::*;
401 use serde_json::Number;
402 use sha2::{Digest, Sha256};
403 use similar_asserts::assert_eq;
404
405 #[test]
406 fn test_object_key() {
407 let cases = [("\n", "1"), ("\r", "<script>"), ("ö", "דּ")];
408 for case in cases {
409 assert_eq!(case.0.cmp(case.1), Ordering::Less);
410 }
411 let mut v = cases
412 .iter()
413 .flat_map(|v| [v.0, v.1])
414 .collect::<std::collections::BTreeSet<_>>()
415 .into_iter();
416 assert_eq!(v.next().unwrap(), "\n");
417 assert_eq!(v.next().unwrap(), "\r");
418 assert_eq!(v.next().unwrap(), "1");
419 assert_eq!(v.next().unwrap(), "<script>");
420 assert_eq!(v.next().unwrap(), "ö");
421 assert_eq!(v.next().unwrap(), "דּ");
422
423 let mut buf = Vec::new();
424 ObjectKey::new_from_str("").write_to(&mut buf).unwrap();
425 assert_eq!(&buf, b"\"\"");
426 }
427
428 macro_rules! encode {
430 ($($tt:tt)+) => {
431 (|v: serde_json::Value| -> Result<Vec<u8>> {
432 v.to_canon_json_vec()
433 })(serde_json::json!($($tt)+))
434 };
435 }
436
437 #[test]
442 fn securesystemslib_asserts() -> Result<()> {
443 assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
444 assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
445 assert_eq!(encode!([])?, b"[]");
446 assert_eq!(encode!({})?, b"{}");
447 assert_eq!(encode!({"A": [99]})?, br#"{"A":[99]}"#);
448 assert_eq!(encode!({"A": true})?, br#"{"A":true}"#);
449 assert_eq!(encode!({"B": false})?, br#"{"B":false}"#);
450 assert_eq!(encode!({"x": 3, "y": 2})?, br#"{"x":3,"y":2}"#);
451 assert_eq!(encode!({"x": 3, "y": null})?, br#"{"x":3,"y":null}"#);
452
453 Ok(())
454 }
455
456 #[test]
458 fn ordered_nested_object() -> Result<()> {
459 assert_eq!(
460 encode!({
461 "nested": {
462 "bad": true,
463 "good": false
464 },
465 "b": 2,
466 "a": 1,
467 "c": {
468 "h": {
469 "h": -5,
470 "i": 3
471 },
472 "a": null,
473 "x": {}
474 }
475 })?,
476 br#"{"a":1,"b":2,"c":{"a":null,"h":{"h":-5,"i":3},"x":{}},"nested":{"bad":true,"good":false}}"#.to_vec(),
477 );
478
479 Ok(())
480 }
481
482 #[allow(clippy::unreadable_literal)]
485 #[test]
486 fn actual_tuf_signed() {
487 let encode_result = encode!(
488 {
489 "signed": {
490 "_type": "timestamp",
491 "spec_version": "1.0.0",
492 "version": 1604605512,
493 "expires": "2020-11-12T19:45:12.613154979Z",
494 "meta": {
495 "snapshot.json": {
496 "length": 1278,
497 "hashes": {
498 "sha256": "56c4ecc3b331f6154d9a5005f6e2978e4198cc8c3b79746c25a592043a2d83d4"
499 },
500 "version": 1604605512
501 }
502 }
503 }
504 }
505 );
506
507 let encoded = encode_result.unwrap();
508 let expected: Vec<u8> = vec![
509 123, 34, 115, 105, 103, 110, 101, 100, 34, 58, 123, 34, 95, 116, 121, 112, 101, 34, 58,
510 34, 116, 105, 109, 101, 115, 116, 97, 109, 112, 34, 44, 34, 101, 120, 112, 105, 114,
511 101, 115, 34, 58, 34, 50, 48, 50, 48, 45, 49, 49, 45, 49, 50, 84, 49, 57, 58, 52, 53,
512 58, 49, 50, 46, 54, 49, 51, 49, 53, 52, 57, 55, 57, 90, 34, 44, 34, 109, 101, 116, 97,
513 34, 58, 123, 34, 115, 110, 97, 112, 115, 104, 111, 116, 46, 106, 115, 111, 110, 34, 58,
514 123, 34, 104, 97, 115, 104, 101, 115, 34, 58, 123, 34, 115, 104, 97, 50, 53, 54, 34,
515 58, 34, 53, 54, 99, 52, 101, 99, 99, 51, 98, 51, 51, 49, 102, 54, 49, 53, 52, 100, 57,
516 97, 53, 48, 48, 53, 102, 54, 101, 50, 57, 55, 56, 101, 52, 49, 57, 56, 99, 99, 56, 99,
517 51, 98, 55, 57, 55, 52, 54, 99, 50, 53, 97, 53, 57, 50, 48, 52, 51, 97, 50, 100, 56,
518 51, 100, 52, 34, 125, 44, 34, 108, 101, 110, 103, 116, 104, 34, 58, 49, 50, 55, 56, 44,
519 34, 118, 101, 114, 115, 105, 111, 110, 34, 58, 49, 54, 48, 52, 54, 48, 53, 53, 49, 50,
520 125, 125, 44, 34, 115, 112, 101, 99, 95, 118, 101, 114, 115, 105, 111, 110, 34, 58, 34,
521 49, 46, 48, 46, 48, 34, 44, 34, 118, 101, 114, 115, 105, 111, 110, 34, 58, 49, 54, 48,
522 52, 54, 48, 53, 53, 49, 50, 125, 125,
523 ];
524 assert_eq!(expected, encoded);
525 }
526
527 #[test]
528 fn encode_u128_i128() {
529 #[derive(serde_derive::Serialize)]
530 struct Object {
531 u128: u128,
532 i128: i128,
533 }
534
535 let value = Object {
536 u128: u128::MAX,
537 i128: i128::MIN,
538 };
539
540 let expected = [
541 123, 34, 105, 49, 50, 56, 34, 58, 45, 49, 55, 48, 49, 52, 49, 49, 56, 51, 52, 54, 48,
542 52, 54, 57, 50, 51, 49, 55, 51, 49, 54, 56, 55, 51, 48, 51, 55, 49, 53, 56, 56, 52, 49,
543 48, 53, 55, 50, 56, 44, 34, 117, 49, 50, 56, 34, 58, 51, 52, 48, 50, 56, 50, 51, 54,
544 54, 57, 50, 48, 57, 51, 56, 52, 54, 51, 52, 54, 51, 51, 55, 52, 54, 48, 55, 52, 51, 49,
545 55, 54, 56, 50, 49, 49, 52, 53, 53, 125,
546 ];
547
548 assert_eq!(value.to_canon_json_vec().unwrap(), expected);
549 }
550
551 #[test]
552 fn test_basic() {
553 let v = serde_json::json! { { "foo": "42" } };
554 let expected = serde_json::to_string(&v).unwrap();
555 let buf = String::from_utf8(encode!(v).unwrap()).unwrap();
556 assert_eq!(&buf, &expected);
557 }
558
559 fn arbitrary_json(
564 keyspace: &'static str,
565 allow_fp: bool,
566 ) -> impl Strategy<Value = serde_json::Value> {
567 use serde_json::Value;
568 let leaf = prop_oneof![
569 Just(Value::Null),
570 any::<f64>().prop_filter_map("valid f64 for JSON", move |v| {
571 let n = if allow_fp && v.fract() != 0.0 {
572 Number::from_f64(v).unwrap()
573 } else {
574 Number::from_u128(v as u32 as u128).unwrap()
577 };
578 Some(Value::Number(n))
579 }),
580 any::<bool>().prop_map(Value::Bool),
581 keyspace.prop_map(Value::String),
582 ];
583 leaf.prop_recursive(
584 8, 256, 10, move |inner| {
588 prop_oneof![
589 prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array),
591 prop::collection::hash_map(keyspace, inner, 0..10)
592 .prop_map(|v| { v.into_iter().collect() }),
593 ]
594 },
595 )
596 }
597
598 proptest! {
599 #[test]
600 fn roundtrip_rfc8785(v in arbitrary_json(".*", true)) {
601 let buf = encode!(&v).unwrap();
602 let v2: serde_json::Value = serde_json::from_slice(&buf)
603 .map_err(|e| format!("Failed to parse {v:?} -> {}: {e}", String::from_utf8_lossy(&buf))).unwrap();
604 assert_eq!(&v, &v2);
605 }
606 }
607
608 fn verify(input: &str, expected: &str) {
609 let input: serde_json::Value = serde_json::from_str(input).unwrap();
610 assert_eq!(expected, input.to_canon_json_string().unwrap());
611 }
612
613 #[test]
614 fn test_arrays() {
615 verify(
616 include_str!("../testdata/input/arrays.json"),
617 include_str!("../testdata/output/arrays.json"),
618 );
619 }
620
621 #[test]
622 fn test_french() {
623 verify(
624 include_str!("../testdata/input/french.json"),
625 include_str!("../testdata/output/french.json"),
626 );
627 }
628
629 #[test]
630 fn test_structures() {
631 verify(
632 include_str!("../testdata/input/structures.json"),
633 include_str!("../testdata/output/structures.json"),
634 );
635 }
636
637 #[test]
638 fn test_unicode() {
639 verify(
640 include_str!("../testdata/input/unicode.json"),
641 include_str!("../testdata/output/unicode.json"),
642 );
643 }
644
645 #[test]
646 fn test_values() {
647 verify(
648 include_str!("../testdata/input/values.json"),
649 include_str!("../testdata/output/values.json"),
650 );
651 }
652
653 #[test]
654 fn test_weird() {
655 verify(
656 include_str!("../testdata/input/weird.json"),
657 include_str!("../testdata/output/weird.json"),
658 );
659 }
660
661 #[test]
662 fn test_from_testdata() -> Result<()> {
663 use cap_std;
664
665 let amb = cap_std::ambient_authority();
666 let root =
667 cap_std::fs::Dir::open_ambient_dir(std::env::var("CARGO_MANIFEST_DIR").unwrap(), amb)?;
668 let dir = root.open_dir("testdata-cjson-orig")?;
669 for entry in dir.entries()? {
670 let entry = entry?;
671 let filename = entry.file_name();
672 let filename = filename.to_str().unwrap();
673 match filename {
674 "errors" => continue,
675 "LICENSE" => continue,
676 _ => {}
677 }
678
679 let json: serde_json::Value = serde_json::from_reader(entry.open()?)?;
680 let enc = encode!(json)?;
681 let mut sha256 = Sha256::new();
682 sha256.update(&enc);
683
684 sha256.update("\n");
686 let filename = filename.trim_end_matches(".json");
687 let hash = format!("{:x}", sha256.finalize());
688 assert_eq!(filename, hash);
689 let json2: serde_json::Value = serde_json::from_slice(&enc)?;
690
691 assert_eq!(json, json2)
692 }
693
694 Ok(())
695 }
696
697 const ASCII_ALPHANUMERIC: &str = r"[a-zA-Z0-9]*";
702
703 proptest! {
704 #[test]
706 fn crosscheck_olpc_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
707 let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
708 let mut olpc_cjson_serialized = Vec::new();
709 let mut ser = serde_json::Serializer::with_formatter(&mut olpc_cjson_serialized, olpc_cjson::CanonicalFormatter::new());
710 v.serialize(&mut ser).unwrap();
711 assert_eq!(canon_json, String::from_utf8(olpc_cjson_serialized).unwrap());
712 }
713 }
714
715 proptest! {
716 #[test]
718 fn crosscheck_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
719 let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
720 let cjson = String::from_utf8(cjson::to_vec(&v).unwrap()).unwrap();
721 assert_eq!(canon_json, cjson);
722 }
723
724 #[test]
726 fn crosscheck_cjson(v in arbitrary_json(".*", false)) {
727 let buf = encode!(&v).unwrap();
728 let self_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
729 let buf = cjson::to_vec(&v).unwrap();
730 let cjson_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
731 assert_eq!(self_reparsed, v);
734 assert_eq!(cjson_reparsed, v);
735 }
736 }
737}