1use std::cmp::Ordering;
70use std::collections::BTreeSet;
71use std::fmt;
72
73use serde::de::{self, DeserializeSeed, Error as DeError, MapAccess, SeqAccess, Visitor};
74use serde::{Deserializer, Serialize};
75use serde_json::{Number, Value};
76
77pub const MAX_NESTING_DEPTH: usize = 128;
79
80mod error;
81pub use error::{JcsError, JcsErrorInfo};
82
83#[deprecated(
99 since = "0.3.0",
100 note = "use to_canon_bytes_from_slice for untrusted input; see PUBLIC_SURFACE.md"
101)]
102pub fn to_canon_bytes<T: Serialize>(value: &T) -> Result<Vec<u8>, JcsError> {
103 let value = serde_json::to_value(value)?;
104 to_canon_bytes_value(&value)
105}
106
107#[deprecated(
117 since = "0.3.0",
118 note = "use to_canon_string_from_str for untrusted input; see PUBLIC_SURFACE.md"
119)]
120pub fn to_canon_string<T: Serialize>(value: &T) -> Result<String, JcsError> {
121 let value = serde_json::to_value(value)?;
122 let bytes = to_canon_bytes_value(&value)?;
123 String::from_utf8(bytes).map_err(|error| {
124 JcsError::InvalidString(format!(
125 "canonical JSON output was not valid UTF-8: {error}"
126 ))
127 })
128}
129
130pub fn to_canon_bytes_from_slice(json: &[u8]) -> Result<Vec<u8>, JcsError> {
143 let value = parse_json_value_no_duplicates(json)?;
144 to_canon_bytes_value(&value)
145}
146
147pub fn to_canon_string_from_str(json: &str) -> Result<String, JcsError> {
154 let bytes = to_canon_bytes_from_slice(json.as_bytes())?;
155 String::from_utf8(bytes).map_err(|error| {
156 JcsError::InvalidString(format!(
157 "canonical JSON output was not valid UTF-8: {error}"
158 ))
159 })
160}
161
162#[derive(Clone, PartialEq, Eq)]
176pub struct CanonicalBytes(Vec<u8>);
177
178impl CanonicalBytes {
179 pub(crate) const fn from_jcs(bytes: Vec<u8>) -> Self {
184 Self(bytes)
185 }
186
187 #[must_use]
191 pub fn as_slice(&self) -> &[u8] {
192 &self.0
193 }
194
195 #[must_use]
197 pub fn len(&self) -> usize {
198 self.0.len()
199 }
200
201 #[must_use]
204 pub fn is_empty(&self) -> bool {
205 self.0.is_empty()
206 }
207
208 #[must_use]
212 pub fn into_vec(self) -> Vec<u8> {
213 self.0
214 }
215}
216
217impl fmt::Debug for CanonicalBytes {
218 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
219 f.debug_struct("CanonicalBytes")
220 .field("len", &self.0.len())
221 .finish_non_exhaustive()
222 }
223}
224
225pub fn canonical_bytes_from_slice(json: &[u8]) -> Result<CanonicalBytes, JcsError> {
236 to_canon_bytes_from_slice(json).map(CanonicalBytes::from_jcs)
237}
238
239#[derive(Clone, Debug, PartialEq, Eq)]
252#[non_exhaustive]
253pub enum DigestAlgorithm {
254 Blake3Untagged,
256 Blake3Keyed {
258 key: [u8; 32],
261 },
262 Blake3DomainSeparated {
267 context: String,
269 },
270 Sha256,
272}
273
274impl DigestAlgorithm {
275 #[must_use]
277 pub const fn name(&self) -> &'static str {
278 match self {
279 Self::Blake3Untagged => "blake3-untagged",
280 Self::Blake3Keyed { .. } => "blake3-keyed",
281 Self::Blake3DomainSeparated { .. } => "blake3-domain-separated",
282 Self::Sha256 => "sha256",
283 }
284 }
285}
286
287#[derive(Clone, Debug, PartialEq, Eq)]
291pub struct DigestStrategy {
292 pub algorithm: DigestAlgorithm,
294}
295
296impl DigestStrategy {
297 #[must_use]
299 pub const fn blake3_untagged() -> Self {
300 Self {
301 algorithm: DigestAlgorithm::Blake3Untagged,
302 }
303 }
304
305 #[must_use]
307 pub const fn blake3_keyed(key: [u8; 32]) -> Self {
308 Self {
309 algorithm: DigestAlgorithm::Blake3Keyed { key },
310 }
311 }
312
313 #[must_use]
315 pub fn blake3_domain_separated(context: impl Into<String>) -> Self {
316 Self {
317 algorithm: DigestAlgorithm::Blake3DomainSeparated {
318 context: context.into(),
319 },
320 }
321 }
322
323 #[must_use]
327 pub const fn sha256() -> Self {
328 Self {
329 algorithm: DigestAlgorithm::Sha256,
330 }
331 }
332}
333
334#[derive(Clone, Debug, PartialEq, Eq)]
340pub struct CanonicalDigest {
341 pub algorithm: DigestAlgorithm,
343 pub bytes: Vec<u8>,
346}
347
348pub fn to_canon_digest_with(
366 value: &Value,
367 strategy: &DigestStrategy,
368) -> Result<CanonicalDigest, JcsError> {
369 let bytes = to_canon_bytes_value(value)?;
370 let digest_bytes = match &strategy.algorithm {
371 DigestAlgorithm::Blake3Untagged => blake3::hash(&bytes).as_bytes().to_vec(),
372 DigestAlgorithm::Blake3Keyed { key } => blake3::keyed_hash(key, &bytes).as_bytes().to_vec(),
373 DigestAlgorithm::Blake3DomainSeparated { context } => {
374 blake3::derive_key(context, &bytes).to_vec()
377 }
378 DigestAlgorithm::Sha256 => {
379 return Err(JcsError::UnsupportedAlgorithm(
380 "SHA-256 over canonical bytes is declared in the API but not \
381 wired in this build; open a follow-up to add the sha2 dep"
382 .to_string(),
383 ));
384 }
385 };
386 Ok(CanonicalDigest {
387 algorithm: strategy.algorithm.clone(),
388 bytes: digest_bytes,
389 })
390}
391
392pub fn to_canon_blake3_digest(value: &Value) -> Result<[u8; 32], JcsError> {
403 let bytes = to_canon_bytes_value(value)?;
404 Ok(*blake3::hash(&bytes).as_bytes())
405}
406
407pub fn to_canon_blake3_digest_from_slice(json: &[u8]) -> Result<[u8; 32], JcsError> {
416 let bytes = to_canon_bytes_from_slice(json)?;
417 Ok(*blake3::hash(&bytes).as_bytes())
418}
419
420pub fn canonicalize(v: &mut Value) -> Result<(), JcsError> {
434 canonicalize_depth(v, 0)
435}
436
437fn canonicalize_depth(v: &mut Value, depth: usize) -> Result<(), JcsError> {
438 if depth > MAX_NESTING_DEPTH {
439 return Err(JcsError::NestingDepthExceeded);
440 }
441 match v {
442 Value::Object(map) => {
443 let mut entries: Vec<(String, Value)> = std::mem::take(map).into_iter().collect();
444 entries.sort_by(|(a, _), (b, _)| cmp_utf16(a, b));
445 for (key, mut value) in entries {
446 canonicalize_depth(&mut value, depth + 1)?;
447 map.insert(key, value);
448 }
449 }
450 Value::Array(arr) => {
451 for x in arr {
452 canonicalize_depth(x, depth + 1)?;
453 }
454 }
455 _ => {}
456 }
457 Ok(())
458}
459
460#[doc(hidden)]
474pub fn deserialize_json_value_no_duplicates<'de, D>(deserializer: D) -> Result<Value, D::Error>
475where
476 D: Deserializer<'de>,
477{
478 NoDuplicateValueSeed { depth: 0 }.deserialize(deserializer)
479}
480
481#[doc(hidden)]
488pub fn validate_string_contents(value: &str, context: &str) -> Result<(), String> {
489 if let Some(ch) = value.chars().find(|&ch| is_noncharacter(ch)) {
490 return Err(format!(
491 "{context} contains the forbidden noncharacter U+{:04X}",
492 ch as u32
493 ));
494 }
495 Ok(())
496}
497
498#[doc(hidden)]
500#[must_use]
501pub fn is_safe_integer(value: i64) -> bool {
502 (-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&value)
503}
504
505const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991;
508
509fn to_canon_bytes_value(value: &Value) -> Result<Vec<u8>, JcsError> {
510 let mut out = Vec::new();
511 emit_value(&mut out, value, 0)?;
512 Ok(out)
513}
514
515fn emit_value(out: &mut Vec<u8>, value: &Value, depth: usize) -> Result<(), JcsError> {
516 if depth > MAX_NESTING_DEPTH {
517 return Err(JcsError::NestingDepthExceeded);
518 }
519 match value {
520 Value::Null => out.extend_from_slice(b"null"),
521 Value::Bool(boolean) => {
522 if *boolean {
523 out.extend_from_slice(b"true");
524 } else {
525 out.extend_from_slice(b"false");
526 }
527 }
528 Value::Number(number) => emit_number(out, number)?,
529 Value::String(string) => emit_string(out, string, "string value")?,
530 Value::Array(array) => {
531 out.push(b'[');
532 for (index, item) in array.iter().enumerate() {
533 if index > 0 {
534 out.push(b',');
535 }
536 emit_value(out, item, depth + 1)?;
537 }
538 out.push(b']');
539 }
540 Value::Object(object) => {
541 out.push(b'{');
542 let mut entries: Vec<_> = object.iter().collect();
543 entries.sort_by(|(left, _), (right, _)| cmp_utf16(left, right));
544
545 for (index, (key, item)) in entries.iter().enumerate() {
546 if index > 0 {
547 out.push(b',');
548 }
549 emit_string(out, key, "object property name")?;
550 out.push(b':');
551 emit_value(out, item, depth + 1)?;
552 }
553 out.push(b'}');
554 }
555 }
556 Ok(())
557}
558
559fn emit_number(out: &mut Vec<u8>, number: &Number) -> Result<(), JcsError> {
560 if let Some(value) = number.as_i64() {
561 let s = value.to_string();
562 ensure_exact_binary64_integer(value.unsigned_abs(), &s)?;
563 out.extend_from_slice(s.as_bytes());
564 return Ok(());
565 }
566
567 if let Some(value) = number.as_u64() {
568 let s = value.to_string();
569 ensure_exact_binary64_integer(value, &s)?;
570 out.extend_from_slice(s.as_bytes());
571 return Ok(());
572 }
573
574 if let Some(value) = number.as_f64() {
575 if !value.is_finite() {
576 return Err(JcsError::InvalidNumber(
577 "encountered a non-finite floating-point number".to_string(),
578 ));
579 }
580
581 let rendered = format_ecmascript_number(value)?;
582 out.extend_from_slice(rendered.as_bytes());
583 return Ok(());
584 }
585
586 Err(JcsError::InvalidNumber(
587 "unsupported JSON number representation".to_string(),
588 ))
589}
590
591fn emit_string(out: &mut Vec<u8>, value: &str, context: &str) -> Result<(), JcsError> {
592 validate_string_contents(value, context).map_err(JcsError::InvalidString)?;
593
594 out.push(b'"');
595 for ch in value.chars() {
596 match ch {
597 '"' => out.extend_from_slice(br#"\""#),
598 '\\' => out.extend_from_slice(br"\\"),
599 '\u{0008}' => out.extend_from_slice(br"\b"),
600 '\u{0009}' => out.extend_from_slice(br"\t"),
601 '\u{000A}' => out.extend_from_slice(br"\n"),
602 '\u{000C}' => out.extend_from_slice(br"\f"),
603 '\u{000D}' => out.extend_from_slice(br"\r"),
604 '\u{0000}'..='\u{001F}' => {
605 let escaped = format!(r"\u{:04x}", ch as u32);
606 out.extend_from_slice(escaped.as_bytes());
607 }
608 _ => {
609 let mut buf = [0u8; 4];
610 let encoded = ch.encode_utf8(&mut buf);
611 out.extend_from_slice(encoded.as_bytes());
612 }
613 }
614 }
615 out.push(b'"');
616
617 Ok(())
618}
619
620fn ensure_exact_binary64_integer(value: u64, original: &str) -> Result<(), JcsError> {
621 if is_exact_binary64_integer(value) {
622 Ok(())
623 } else {
624 Err(JcsError::InvalidNumber(format!(
625 "integer {original} is not exactly representable as an IEEE 754 double; encode it as a string"
626 )))
627 }
628}
629
630const fn is_exact_binary64_integer(value: u64) -> bool {
631 if value == 0 {
632 return true;
633 }
634 let bit_len = u64::BITS - value.leading_zeros();
635 bit_len <= 53 || value.trailing_zeros() >= bit_len - 53
636}
637
638fn format_ecmascript_number(value: f64) -> Result<String, JcsError> {
639 if value == 0.0 {
640 return Ok("0".to_string());
641 }
642
643 let mut buffer = zmij::Buffer::new();
644 let shortest = buffer.format_finite(value);
645 let (negative, body) = if let Some(stripped) = shortest.strip_prefix('-') {
646 (true, stripped)
647 } else {
648 (false, shortest)
649 };
650
651 let (digits, exponent) = parse_shortest_decimal(body)?;
652 let rendered = render_ecmascript_number(&digits, exponent)?;
653
654 if negative {
655 Ok(format!("-{rendered}"))
656 } else {
657 Ok(rendered)
658 }
659}
660
661fn parse_shortest_decimal(body: &str) -> Result<(String, i32), JcsError> {
662 if let Some((mantissa, exponent)) = body.split_once('e') {
663 let digits: String = mantissa.chars().filter(|&ch| ch != '.').collect();
664 let exponent = exponent.parse::<i32>().map_err(|error| {
665 JcsError::InvalidNumber(format!(
666 "failed to parse formatter exponent {exponent:?}: {error}"
667 ))
668 })?;
669 return Ok((digits, exponent + 1));
670 }
671
672 if let Some((integer, fractional)) = body.split_once('.') {
673 let fractional = fractional.trim_end_matches('0');
674
675 if integer != "0" {
676 let mut digits = String::with_capacity(integer.len() + fractional.len());
677 digits.push_str(integer);
678 digits.push_str(fractional);
679 let exponent = i32::try_from(integer.len()).map_err(|_| {
680 JcsError::InvalidNumber(
681 "formatter emitted an unexpectedly large integer part".to_string(),
682 )
683 })?;
684 return Ok((digits, exponent));
685 }
686
687 let leading_zeros = fractional.bytes().take_while(|&byte| byte == b'0').count();
688 let exponent = i32::try_from(leading_zeros).map_err(|_| {
689 JcsError::InvalidNumber(
690 "formatter emitted an unexpectedly long leading-zero run".to_string(),
691 )
692 })?;
693 return Ok((fractional[leading_zeros..].to_owned(), -exponent));
694 }
695
696 let exponent = i32::try_from(body.len()).map_err(|_| {
697 JcsError::InvalidNumber("formatter emitted an unexpectedly long integer".to_string())
698 })?;
699 Ok((body.to_owned(), exponent))
700}
701
702fn render_ecmascript_number(digits: &str, exponent: i32) -> Result<String, JcsError> {
703 let digits_len = i32::try_from(digits.len()).map_err(|_| {
704 JcsError::InvalidNumber("formatter emitted an unexpectedly long digit sequence".to_string())
705 })?;
706 if digits_len == 0 {
707 return Err(JcsError::InvalidNumber("empty digit sequence".to_string()));
708 }
709
710 if digits_len <= exponent && exponent <= 21 {
711 let capacity = usize::try_from(exponent).map_err(|_| {
712 JcsError::InvalidNumber(
713 "formatter produced a negative fixed-width exponent".to_string(),
714 )
715 })?;
716 let mut out = String::with_capacity(capacity);
717 out.push_str(digits);
718 for _ in 0..(exponent - digits_len) {
719 out.push('0');
720 }
721 return Ok(out);
722 }
723
724 if 0 < exponent && exponent <= 21 {
725 let split = usize::try_from(exponent).map_err(|_| {
726 JcsError::InvalidNumber("formatter produced a negative split exponent".to_string())
727 })?;
728 let mut out = String::with_capacity(digits.len() + 1);
729 out.push_str(&digits[..split]);
730 out.push('.');
731 out.push_str(&digits[split..]);
732 return Ok(out);
733 }
734
735 if -6 < exponent && exponent <= 0 {
736 let zeros = usize::try_from(-exponent).map_err(|_| {
737 JcsError::InvalidNumber("formatter produced an invalid negative exponent".to_string())
738 })?;
739 let mut out = String::with_capacity(2 + zeros + digits.len());
740 out.push_str("0.");
741 for _ in 0..zeros {
742 out.push('0');
743 }
744 out.push_str(digits);
745 return Ok(out);
746 }
747
748 let exponent = exponent - 1;
749 let (first, rest) = digits.split_at(1);
750 let mut out = String::with_capacity(digits.len() + 6);
751 out.push_str(first);
752 if !rest.is_empty() {
753 out.push('.');
754 out.push_str(rest);
755 }
756 out.push('e');
757 if exponent >= 0 {
758 out.push('+');
759 }
760 out.push_str(&exponent.to_string());
761 Ok(out)
762}
763
764fn cmp_utf16(left: &str, right: &str) -> Ordering {
765 left.encode_utf16().cmp(right.encode_utf16())
766}
767
768fn is_noncharacter(ch: char) -> bool {
769 let code = ch as u32;
770 (0xFDD0..=0xFDEF).contains(&code) || code & 0xFFFE == 0xFFFE
771}
772
773const DEPTH_EXCEEDED_SENTINEL: &str = "nesting depth exceeded maximum of ";
777
778fn parse_json_value_no_duplicates(json: &[u8]) -> Result<Value, JcsError> {
779 let mut deserializer = serde_json::Deserializer::from_slice(json);
780 deserializer.disable_recursion_limit();
783 let value = deserialize_json_value_no_duplicates(&mut deserializer).map_err(|e| {
784 if e.to_string().starts_with(DEPTH_EXCEEDED_SENTINEL) {
785 JcsError::NestingDepthExceeded
786 } else {
787 JcsError::Json(e)
788 }
789 })?;
790 deserializer.end()?;
791 Ok(value)
792}
793
794struct NoDuplicateValueSeed {
795 depth: usize,
796}
797
798impl<'de> DeserializeSeed<'de> for NoDuplicateValueSeed {
799 type Value = Value;
800
801 fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
802 where
803 D: Deserializer<'de>,
804 {
805 if self.depth > MAX_NESTING_DEPTH {
806 return Err(D::Error::custom(format!(
807 "{DEPTH_EXCEEDED_SENTINEL}{MAX_NESTING_DEPTH}"
808 )));
809 }
810 deserializer.deserialize_any(NoDuplicateValueVisitor { depth: self.depth })
811 }
812}
813
814struct NoDuplicateValueVisitor {
815 depth: usize,
816}
817
818impl<'de> Visitor<'de> for NoDuplicateValueVisitor {
819 type Value = Value;
820
821 fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
822 formatter.write_str("a valid JSON value")
823 }
824
825 fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E> {
826 Ok(Value::Bool(value))
827 }
828
829 fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> {
830 Ok(Value::Number(Number::from(value)))
831 }
832
833 fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> {
834 Ok(Value::Number(Number::from(value)))
835 }
836
837 fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
838 where
839 E: de::Error,
840 {
841 Number::from_f64(value)
842 .map(Value::Number)
843 .ok_or_else(|| E::custom("encountered a non-finite floating-point number"))
844 }
845
846 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
847 where
848 E: de::Error,
849 {
850 validate_string_contents(value, "string value").map_err(E::custom)?;
851 Ok(Value::String(value.to_owned()))
852 }
853
854 fn visit_borrowed_str<E>(self, value: &'de str) -> Result<Self::Value, E>
855 where
856 E: de::Error,
857 {
858 self.visit_str(value)
859 }
860
861 fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
862 where
863 E: de::Error,
864 {
865 validate_string_contents(&value, "string value").map_err(E::custom)?;
866 Ok(Value::String(value))
867 }
868
869 fn visit_none<E>(self) -> Result<Self::Value, E> {
870 Ok(Value::Null)
871 }
872
873 fn visit_unit<E>(self) -> Result<Self::Value, E> {
874 Ok(Value::Null)
875 }
876
877 fn visit_seq<A>(self, mut access: A) -> Result<Self::Value, A::Error>
878 where
879 A: SeqAccess<'de>,
880 {
881 let mut values = Vec::with_capacity(access.size_hint().unwrap_or(0));
882 while let Some(value) = access.next_element_seed(NoDuplicateValueSeed {
883 depth: self.depth + 1,
884 })? {
885 values.push(value);
886 }
887 Ok(Value::Array(values))
888 }
889
890 fn visit_map<A>(self, mut access: A) -> Result<Self::Value, A::Error>
891 where
892 A: MapAccess<'de>,
893 {
894 let Some(first_key) = access.next_key::<String>()? else {
895 return Ok(Value::Object(serde_json::Map::new()));
896 };
897
898 if !first_key.starts_with('$') {
905 validate_string_contents(&first_key, "object property name")
906 .map_err(A::Error::custom)?;
907 }
908
909 let first_value = access.next_value_seed(NoDuplicateValueSeed {
910 depth: self.depth + 1,
911 })?;
912
913 let mut object = serde_json::Map::new();
914 object.insert(first_key.clone(), first_value);
915
916 let mut seen = BTreeSet::new();
917 seen.insert(first_key);
918
919 while let Some(key) = access.next_key::<String>()? {
920 if !key.starts_with('$') {
922 validate_string_contents(&key, "object property name").map_err(A::Error::custom)?;
923 }
924
925 if !seen.insert(key.clone()) {
926 return Err(A::Error::custom(format!("duplicate property name `{key}`")));
927 }
928
929 let value = access.next_value_seed(NoDuplicateValueSeed {
930 depth: self.depth + 1,
931 })?;
932 object.insert(key, value);
933 }
934
935 serde_json::from_value(Value::Object(object)).map_err(A::Error::custom)
939 }
940}
941
942#[cfg(test)]
943#[path = "lib_tests.rs"]
944mod tests;