1#![doc = include_str!("../README.md")]
5#![forbid(unsafe_code)]
6
7mod floatformat;
8
9use std::collections::BTreeMap;
10use std::io::{Error, ErrorKind, Result, Write};
11
12use serde::Serialize;
13use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer};
14
15#[derive(Debug, Default)]
21pub struct CanonicalFormatter {
22 object_stack: Vec<Object>,
23}
24
25#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
27struct ObjectKey(Vec<u16>);
28
29impl ObjectKey {
30 fn new_from_str(s: &str) -> Self {
31 Self(s.encode_utf16().collect())
32 }
33
34 fn new_from_bytes(v: &[u8]) -> Result<Self> {
35 let s = std::str::from_utf8(v)
36 .map_err(|e| Error::new(ErrorKind::InvalidData, format!("Expected UTF-8 key: {e}")))?;
37 Ok(Self::new_from_str(s))
38 }
39
40 fn as_string(&self) -> Result<String> {
41 std::char::decode_utf16(self.0.iter().copied()).try_fold(String::new(), |mut acc, c| {
42 let c = c.map_err(|_| Error::new(ErrorKind::InvalidData, "Expected UTF-8 key"))?;
43 acc.push(c);
44 Ok(acc)
45 })
46 }
47
48 fn write_to<W: Write>(&self, w: W) -> Result<()> {
50 let s = self.as_string()?;
51 let val = serde_json::Value::String(s);
52 let mut s = Serializer::new(w);
53 val.serialize(&mut s).map_err(|e| {
54 let kind = e.io_error_kind().unwrap();
55 Error::new(kind, "I/O error")
56 })
57 }
58}
59
60#[derive(Debug, Default)]
80struct Object {
81 obj: BTreeMap<ObjectKey, Vec<u8>>,
82 next_key: Vec<u8>,
83 next_value: Vec<u8>,
84 key_done: bool,
85}
86
87enum WriterTarget<'w, W> {
92 Underlying(W),
93 Buffer(&'w mut Vec<u8>),
94}
95
96impl<W: Write> Write for WriterTarget<'_, W> {
97 fn write(&mut self, buf: &[u8]) -> Result<usize> {
98 match self {
99 WriterTarget::Underlying(w) => w.write(buf),
100 WriterTarget::Buffer(b) => {
101 b.extend_from_slice(buf);
102 Ok(buf.len())
103 }
104 }
105 }
106
107 fn flush(&mut self) -> Result<()> {
108 match self {
109 WriterTarget::Underlying(w) => w.flush(),
110 WriterTarget::Buffer(_) => Ok(()),
111 }
112 }
113}
114
115impl CanonicalFormatter {
116 pub fn new() -> Self {
118 Self::default()
119 }
120
121 fn writer<'a, W: Write + ?Sized>(
129 &'a mut self,
130 writer: &'a mut W,
131 ) -> WriterTarget<'a, &'a mut W> {
132 self.writer_or_key(writer, false).0
133 }
134
135 fn writer_or_key<'a, W: Write + ?Sized>(
138 &'a mut self,
139 writer: &'a mut W,
140 object_key_allowed: bool,
141 ) -> (WriterTarget<'a, &'a mut W>, bool) {
142 self.object_stack
143 .last_mut()
144 .map_or((WriterTarget::Underlying(writer), false), |object| {
145 let r = if object.key_done {
146 &mut object.next_value
147 } else if !object_key_allowed {
148 panic!("Unhandled write into object key");
149 } else {
150 &mut object.next_key
151 };
152 (WriterTarget::Buffer(r), !object.key_done)
153 })
154 }
155
156 fn obj_mut(&mut self) -> Result<&mut Object> {
158 self.object_stack.last_mut().ok_or_else(|| {
159 Error::new(
160 ErrorKind::Other,
161 "serde_json called an object method without calling begin_object first",
162 )
163 })
164 }
165}
166
167macro_rules! wrapper {
170 ($f:ident) => {
171 fn $f<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
172 CompactFormatter.$f(&mut self.writer(writer))
173 }
174 };
175
176 ($f:ident, $t:ty) => {
177 fn $f<W: Write + ?Sized>(&mut self, writer: &mut W, arg: $t) -> Result<()> {
178 CompactFormatter.$f(&mut self.writer(writer), arg)
179 }
180 };
181}
182
183impl Formatter for CanonicalFormatter {
184 wrapper!(write_null);
185 wrapper!(write_bool, bool);
186 wrapper!(write_i8, i8);
187 wrapper!(write_i16, i16);
188 wrapper!(write_i32, i32);
189 wrapper!(write_i64, i64);
190 wrapper!(write_i128, i128);
191 wrapper!(write_u8, u8);
192 wrapper!(write_u16, u16);
193 wrapper!(write_u32, u32);
194 wrapper!(write_u64, u64);
195 wrapper!(write_u128, u128);
196
197 fn write_f32<W: Write + ?Sized>(&mut self, writer: &mut W, value: f32) -> Result<()> {
198 self.write_f64(writer, value.into())
199 }
200
201 fn write_f64<W: Write + ?Sized>(&mut self, writer: &mut W, value: f64) -> Result<()> {
202 let v = floatformat::number_to_json(value).map_err(|e| {
203 Error::new(
204 ErrorKind::InvalidData,
205 format!("Unhandled floating point value {e}"),
206 )
207 })?;
208 CompactFormatter.write_string_fragment(&mut self.writer(writer), &v)
209 }
210
211 fn write_number_str<W: Write + ?Sized>(&mut self, writer: &mut W, value: &str) -> Result<()> {
215 CompactFormatter.write_number_str(&mut self.writer(writer), value)
216 }
217
218 fn begin_string<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
219 let Some(v) = self.object_stack.last_mut() else {
220 return CompactFormatter.begin_string(writer);
221 };
222 if !v.key_done {
223 return Ok(());
224 }
225 CompactFormatter.begin_string(&mut v.next_value)
226 }
227
228 fn end_string<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
229 let Some(v) = self.object_stack.last_mut() else {
230 return CompactFormatter.end_string(writer);
231 };
232 if !v.key_done {
233 return Ok(());
234 }
235 CompactFormatter.end_string(&mut v.next_value)
236 }
237
238 fn write_string_fragment<W: Write + ?Sized>(
239 &mut self,
240 writer: &mut W,
241 fragment: &str,
242 ) -> Result<()> {
243 let (mut writer, in_key) = self.writer_or_key(writer, true);
244 if in_key {
245 writer.write_all(fragment.as_bytes())
246 } else {
247 CompactFormatter.write_string_fragment(&mut writer, fragment)
248 }
249 }
250
251 fn write_char_escape<W: Write + ?Sized>(
252 &mut self,
253 writer: &mut W,
254 char_escape: CharEscape,
255 ) -> Result<()> {
256 let (mut writer, in_key) = self.writer_or_key(writer, true);
257 if in_key {
258 let v = match char_escape {
259 CharEscape::Quote => b"\"",
260 CharEscape::ReverseSolidus => b"\\",
261 CharEscape::Solidus => b"/",
262 CharEscape::Backspace => b"\x08",
263 CharEscape::FormFeed => b"\x0C",
264 CharEscape::LineFeed => b"\n",
265 CharEscape::CarriageReturn => b"\r",
266 CharEscape::Tab => b"\t",
267 CharEscape::AsciiControl(c) => &[c],
268 };
269 writer.write_all(v)
270 } else {
271 CompactFormatter.write_char_escape(&mut writer, char_escape)
272 }
273 }
274
275 wrapper!(begin_array);
276 wrapper!(end_array);
277 wrapper!(begin_array_value, bool); wrapper!(end_array_value);
279
280 fn begin_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
284 CompactFormatter.begin_object(&mut self.writer(writer))?;
285 self.object_stack.push(Object::default());
286 Ok(())
287 }
288
289 fn end_object<W: Write + ?Sized>(&mut self, writer: &mut W) -> Result<()> {
290 let object = self.object_stack.pop().ok_or_else(|| {
291 Error::new(
292 ErrorKind::Other,
293 "serde_json called Formatter::end_object object method
294 without calling begin_object first",
295 )
296 })?;
297 let mut writer = self.writer(writer);
298 let mut first = true;
299
300 for (key, value) in object.obj {
301 CompactFormatter.begin_object_key(&mut writer, first)?;
302 key.write_to(&mut writer)?;
303 CompactFormatter.end_object_key(&mut writer)?;
304
305 CompactFormatter.begin_object_value(&mut writer)?;
306 writer.write_all(&value)?;
307 CompactFormatter.end_object_value(&mut writer)?;
308
309 first = false;
310 }
311
312 CompactFormatter.end_object(&mut writer)
313 }
314
315 fn begin_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W, _first: bool) -> Result<()> {
316 let object = self.obj_mut()?;
317 object.key_done = false;
318 Ok(())
319 }
320
321 fn end_object_key<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
322 let object = self.obj_mut()?;
323 object.key_done = true;
324 Ok(())
325 }
326
327 fn begin_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
328 Ok(())
329 }
330
331 fn end_object_value<W: Write + ?Sized>(&mut self, _writer: &mut W) -> Result<()> {
332 let object = self.obj_mut()?;
333 let key = std::mem::take(&mut object.next_key);
334 let value = std::mem::take(&mut object.next_value);
335 object.obj.insert(ObjectKey::new_from_bytes(&key)?, value);
337 Ok(())
338 }
339
340 fn write_raw_fragment<W: Write + ?Sized>(
344 &mut self,
345 writer: &mut W,
346 fragment: &str,
347 ) -> Result<()> {
348 let mut ser = Serializer::with_formatter(self.writer(writer), Self::new());
349 serde_json::from_str::<serde_json::Value>(fragment)?.serialize(&mut ser)?;
350 Ok(())
351 }
352}
353
354#[cfg(test)]
355mod tests {
356 use super::*;
357
358 use std::{cmp::Ordering, io::Result};
359
360 use proptest::prelude::*;
361 use serde::Serialize;
362 use serde_json::{Number, Serializer};
363 use sha2::{Digest, Sha256};
364 use similar_asserts::assert_eq;
365
366 #[test]
367 fn test_object_key() {
368 let cases = [("\n", "1"), ("\r", "<script>"), ("ö", "דּ")];
369 for case in cases {
370 assert_eq!(case.0.cmp(case.1), Ordering::Less);
371 }
372 let mut v = cases
373 .iter()
374 .flat_map(|v| [v.0, v.1])
375 .collect::<std::collections::BTreeSet<_>>()
376 .into_iter();
377 assert_eq!(v.next().unwrap(), "\n");
378 assert_eq!(v.next().unwrap(), "\r");
379 assert_eq!(v.next().unwrap(), "1");
380 assert_eq!(v.next().unwrap(), "<script>");
381 assert_eq!(v.next().unwrap(), "ö");
382 assert_eq!(v.next().unwrap(), "דּ");
383
384 let mut buf = Vec::new();
385 ObjectKey::new_from_str("").write_to(&mut buf).unwrap();
386 assert_eq!(&buf, b"\"\"");
387 }
388
389 macro_rules! encode {
391 ($($tt:tt)+) => {
392 (|v: serde_json::Value| -> Result<Vec<u8>> {
393 let mut buf = Vec::new();
394 let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
395 v.serialize(&mut ser)?;
396 Ok(buf)
397 })(serde_json::json!($($tt)+))
398 };
399 }
400
401 #[test]
406 fn securesystemslib_asserts() -> Result<()> {
407 assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
408 assert_eq!(encode!([1, 2, 3])?, b"[1,2,3]");
409 assert_eq!(encode!([])?, b"[]");
410 assert_eq!(encode!({})?, b"{}");
411 assert_eq!(encode!({"A": [99]})?, br#"{"A":[99]}"#);
412 assert_eq!(encode!({"A": true})?, br#"{"A":true}"#);
413 assert_eq!(encode!({"B": false})?, br#"{"B":false}"#);
414 assert_eq!(encode!({"x": 3, "y": 2})?, br#"{"x":3,"y":2}"#);
415 assert_eq!(encode!({"x": 3, "y": null})?, br#"{"x":3,"y":null}"#);
416
417 Ok(())
418 }
419
420 #[test]
422 fn ordered_nested_object() -> Result<()> {
423 assert_eq!(
424 encode!({
425 "nested": {
426 "bad": true,
427 "good": false
428 },
429 "b": 2,
430 "a": 1,
431 "c": {
432 "h": {
433 "h": -5,
434 "i": 3
435 },
436 "a": null,
437 "x": {}
438 }
439 })?,
440 br#"{"a":1,"b":2,"c":{"a":null,"h":{"h":-5,"i":3},"x":{}},"nested":{"bad":true,"good":false}}"#.to_vec(),
441 );
442
443 Ok(())
444 }
445
446 #[allow(clippy::unreadable_literal)]
449 #[test]
450 fn actual_tuf_signed() {
451 let encode_result = encode!(
452 {
453 "signed": {
454 "_type": "timestamp",
455 "spec_version": "1.0.0",
456 "version": 1604605512,
457 "expires": "2020-11-12T19:45:12.613154979Z",
458 "meta": {
459 "snapshot.json": {
460 "length": 1278,
461 "hashes": {
462 "sha256": "56c4ecc3b331f6154d9a5005f6e2978e4198cc8c3b79746c25a592043a2d83d4"
463 },
464 "version": 1604605512
465 }
466 }
467 }
468 }
469 );
470
471 let encoded = encode_result.unwrap();
472 let expected: Vec<u8> = vec![
473 123, 34, 115, 105, 103, 110, 101, 100, 34, 58, 123, 34, 95, 116, 121, 112, 101, 34, 58,
474 34, 116, 105, 109, 101, 115, 116, 97, 109, 112, 34, 44, 34, 101, 120, 112, 105, 114,
475 101, 115, 34, 58, 34, 50, 48, 50, 48, 45, 49, 49, 45, 49, 50, 84, 49, 57, 58, 52, 53,
476 58, 49, 50, 46, 54, 49, 51, 49, 53, 52, 57, 55, 57, 90, 34, 44, 34, 109, 101, 116, 97,
477 34, 58, 123, 34, 115, 110, 97, 112, 115, 104, 111, 116, 46, 106, 115, 111, 110, 34, 58,
478 123, 34, 104, 97, 115, 104, 101, 115, 34, 58, 123, 34, 115, 104, 97, 50, 53, 54, 34,
479 58, 34, 53, 54, 99, 52, 101, 99, 99, 51, 98, 51, 51, 49, 102, 54, 49, 53, 52, 100, 57,
480 97, 53, 48, 48, 53, 102, 54, 101, 50, 57, 55, 56, 101, 52, 49, 57, 56, 99, 99, 56, 99,
481 51, 98, 55, 57, 55, 52, 54, 99, 50, 53, 97, 53, 57, 50, 48, 52, 51, 97, 50, 100, 56,
482 51, 100, 52, 34, 125, 44, 34, 108, 101, 110, 103, 116, 104, 34, 58, 49, 50, 55, 56, 44,
483 34, 118, 101, 114, 115, 105, 111, 110, 34, 58, 49, 54, 48, 52, 54, 48, 53, 53, 49, 50,
484 125, 125, 44, 34, 115, 112, 101, 99, 95, 118, 101, 114, 115, 105, 111, 110, 34, 58, 34,
485 49, 46, 48, 46, 48, 34, 44, 34, 118, 101, 114, 115, 105, 111, 110, 34, 58, 49, 54, 48,
486 52, 54, 48, 53, 53, 49, 50, 125, 125,
487 ];
488 assert_eq!(expected, encoded);
489 }
490
491 #[test]
492 fn encode_u128_i128() {
493 #[derive(serde_derive::Serialize)]
494 struct Object {
495 u128: u128,
496 i128: i128,
497 }
498
499 let value = Object {
500 u128: u128::MAX,
501 i128: i128::MIN,
502 };
503
504 let mut buf = Vec::new();
505 let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
506 value.serialize(&mut ser).unwrap();
507
508 let expected = [
509 123, 34, 105, 49, 50, 56, 34, 58, 45, 49, 55, 48, 49, 52, 49, 49, 56, 51, 52, 54, 48,
510 52, 54, 57, 50, 51, 49, 55, 51, 49, 54, 56, 55, 51, 48, 51, 55, 49, 53, 56, 56, 52, 49,
511 48, 53, 55, 50, 56, 44, 34, 117, 49, 50, 56, 34, 58, 51, 52, 48, 50, 56, 50, 51, 54,
512 54, 57, 50, 48, 57, 51, 56, 52, 54, 51, 52, 54, 51, 51, 55, 52, 54, 48, 55, 52, 51, 49,
513 55, 54, 56, 50, 49, 49, 52, 53, 53, 125,
514 ];
515
516 assert_eq!(buf, expected);
517 }
518
519 #[test]
520 fn test_basic() {
521 let v = serde_json::json! { { "foo": "42" } };
522 let expected = serde_json::to_string(&v).unwrap();
523 let buf = String::from_utf8(encode!(v).unwrap()).unwrap();
524 assert_eq!(&buf, &expected);
525 }
526
527 fn arbitrary_json() -> impl Strategy<Value = serde_json::Value> {
528 use serde_json::Value;
529 const S: &str = ".*";
530 let leaf = prop_oneof![
531 Just(Value::Null),
532 any::<u32>().prop_map(|v| Value::Number(Number::from_u128(v.into()).unwrap())),
533 any::<bool>().prop_map(Value::Bool),
534 S.prop_map(Value::String),
535 ];
536 leaf.prop_recursive(
537 8, 256, 10, |inner| {
541 prop_oneof![
542 prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array),
544 prop::collection::hash_map(S, inner, 0..10)
545 .prop_map(|v| { v.into_iter().collect() }),
546 ]
547 },
548 )
549 }
550
551 proptest! {
552 #[test]
553 fn roundtrip_rfc8785(v in arbitrary_json()) {
554 let buf = encode!(&v).unwrap();
555 let v2: serde_json::Value = serde_json::from_slice(&buf)
556 .map_err(|e| format!("Failed to parse {v:?} -> {}: {e}", String::from_utf8_lossy(&buf))).unwrap();
557 assert_eq!(&v, &v2);
558 }
559 }
560
561 fn verify(input: &str, expected: &str) {
562 let input: serde_json::Value = serde_json::from_str(input).unwrap();
563 let mut buf = Vec::new();
564 let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
565 input.serialize(&mut ser).unwrap();
566 let buf = String::from_utf8(buf).unwrap();
567 assert_eq!(expected, &buf);
568 }
569
570 #[test]
571 fn test_arrays() {
572 verify(
573 include_str!("../testdata/input/arrays.json"),
574 include_str!("../testdata/output/arrays.json"),
575 );
576 }
577
578 #[test]
579 fn test_french() {
580 verify(
581 include_str!("../testdata/input/french.json"),
582 include_str!("../testdata/output/french.json"),
583 );
584 }
585
586 #[test]
587 fn test_structures() {
588 verify(
589 include_str!("../testdata/input/structures.json"),
590 include_str!("../testdata/output/structures.json"),
591 );
592 }
593
594 #[test]
595 fn test_unicode() {
596 verify(
597 include_str!("../testdata/input/unicode.json"),
598 include_str!("../testdata/output/unicode.json"),
599 );
600 }
601
602 #[test]
603 fn test_values() {
604 verify(
605 include_str!("../testdata/input/values.json"),
606 include_str!("../testdata/output/values.json"),
607 );
608 }
609
610 #[test]
611 fn test_weird() {
612 verify(
613 include_str!("../testdata/input/weird.json"),
614 include_str!("../testdata/output/weird.json"),
615 );
616 }
617
618 #[test]
619 fn test_from_testdata() -> Result<()> {
620 use cap_std;
621
622 let amb = cap_std::ambient_authority();
623 let root =
624 cap_std::fs::Dir::open_ambient_dir(std::env::var("CARGO_MANIFEST_DIR").unwrap(), amb)?;
625 let dir = root.open_dir("testdata-cjson-orig")?;
626 for entry in dir.entries()? {
627 let entry = entry?;
628 let filename = entry.file_name();
629 let filename = filename.to_str().unwrap();
630 match filename {
631 "errors" => continue,
632 "LICENSE" => continue,
633 _ => {}
634 }
635
636 let json: serde_json::Value = serde_json::from_reader(entry.open()?)?;
637 let enc = encode!(json)?;
638 let mut sha256 = Sha256::new();
639 sha256.update(&enc);
640
641 sha256.update("\n");
643 let filename = filename.trim_end_matches(".json");
644 let hash = format!("{:x}", sha256.finalize());
645 assert_eq!(filename, hash);
646 let json2: serde_json::Value = serde_json::from_slice(&enc)?;
647
648 assert_eq!(json, json2)
649 }
650
651 Ok(())
652 }
653
654 proptest! {
655 #[test]
656 fn crosscheck_olpc_cjson(v in arbitrary_json()) {
657 use olpc_cjson::CanonicalFormatter;
658
659 let mut olpc_cjson_serialized = Vec::new();
660 let mut ser = serde_json::Serializer::with_formatter(&mut olpc_cjson_serialized, CanonicalFormatter::new());
661 prop_assume!(v.serialize(&mut ser).is_ok());
662
663 let buf = encode!(&v).unwrap();
664 assert_eq!(buf, olpc_cjson_serialized);
665 }
666 }
667}