1use std::cmp::Ordering;
41use std::collections::BTreeSet;
42
43use serde::de::{self, DeserializeSeed, Error as DeError, MapAccess, SeqAccess, Visitor};
44use serde::{Deserializer, Serialize};
45use serde_json::{Number, Value};
46
47pub const MAX_NESTING_DEPTH: usize = 128;
54
55#[derive(Debug)]
57#[non_exhaustive]
58pub enum JcsError {
59 Json(serde_json::Error),
61 InvalidString(String),
63 InvalidNumber(String),
65 NestingDepthExceeded,
67}
68
69impl std::fmt::Display for JcsError {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 match self {
72 Self::Json(e) => write!(f, "JCS JSON processing failed: {e}"),
73 Self::InvalidString(msg) => write!(f, "JCS string validation failed: {msg}"),
74 Self::InvalidNumber(msg) => write!(f, "JCS number validation failed: {msg}"),
75 Self::NestingDepthExceeded => write!(
76 f,
77 "JCS nesting depth exceeded maximum of {MAX_NESTING_DEPTH}"
78 ),
79 }
80 }
81}
82
83impl std::error::Error for JcsError {
84 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
85 match self {
86 Self::Json(e) => Some(e),
87 Self::InvalidString(_) | Self::InvalidNumber(_) | Self::NestingDepthExceeded => None,
88 }
89 }
90}
91
92impl From<serde_json::Error> for JcsError {
93 fn from(error: serde_json::Error) -> Self {
94 Self::Json(error)
95 }
96}
97
98#[deprecated(
114 since = "0.3.0",
115 note = "use to_canon_bytes_from_slice for untrusted input; see PUBLIC_SURFACE.md"
116)]
117pub fn to_canon_bytes<T: Serialize>(value: &T) -> Result<Vec<u8>, JcsError> {
118 let value = serde_json::to_value(value)?;
119 to_canon_bytes_value(&value)
120}
121
122#[deprecated(
132 since = "0.3.0",
133 note = "use to_canon_string_from_str for untrusted input; see PUBLIC_SURFACE.md"
134)]
135pub fn to_canon_string<T: Serialize>(value: &T) -> Result<String, JcsError> {
136 let value = serde_json::to_value(value)?;
137 let bytes = to_canon_bytes_value(&value)?;
138 String::from_utf8(bytes).map_err(|error| {
139 JcsError::InvalidString(format!(
140 "canonical JSON output was not valid UTF-8: {error}"
141 ))
142 })
143}
144
145pub fn to_canon_bytes_from_slice(json: &[u8]) -> Result<Vec<u8>, JcsError> {
158 let value = parse_json_value_no_duplicates(json)?;
159 to_canon_bytes_value(&value)
160}
161
162pub fn to_canon_string_from_str(json: &str) -> Result<String, JcsError> {
169 let bytes = to_canon_bytes_from_slice(json.as_bytes())?;
170 String::from_utf8(bytes).map_err(|error| {
171 JcsError::InvalidString(format!(
172 "canonical JSON output was not valid UTF-8: {error}"
173 ))
174 })
175}
176
177pub fn canonicalize(v: &mut Value) -> Result<(), JcsError> {
191 canonicalize_depth(v, 0)
192}
193
194fn canonicalize_depth(v: &mut Value, depth: usize) -> Result<(), JcsError> {
195 if depth > MAX_NESTING_DEPTH {
196 return Err(JcsError::NestingDepthExceeded);
197 }
198 match v {
199 Value::Object(map) => {
200 let mut entries: Vec<(String, Value)> =
201 std::mem::take(map).into_iter().collect();
202 entries.sort_by(|(a, _), (b, _)| cmp_utf16(a, b));
203 for (key, mut value) in entries {
204 canonicalize_depth(&mut value, depth + 1)?;
205 map.insert(key, value);
206 }
207 }
208 Value::Array(arr) => {
209 for x in arr {
210 canonicalize_depth(x, depth + 1)?;
211 }
212 }
213 _ => {}
214 }
215 Ok(())
216}
217
218#[doc(hidden)]
233pub fn deserialize_json_value_no_duplicates<'de, D>(deserializer: D) -> Result<Value, D::Error>
234where
235 D: Deserializer<'de>,
236{
237 NoDuplicateValueSeed { depth: 0 }.deserialize(deserializer)
238}
239
240#[doc(hidden)]
247pub fn validate_string_contents(value: &str, context: &str) -> Result<(), String> {
248 if let Some(ch) = value.chars().find(|&ch| is_noncharacter(ch)) {
249 return Err(format!(
250 "{context} contains the forbidden noncharacter U+{:04X}",
251 ch as u32
252 ));
253 }
254 Ok(())
255}
256
257#[doc(hidden)]
259#[must_use]
260pub fn is_safe_integer(value: i64) -> bool {
261 (-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&value)
262}
263
264const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991;
267
268fn to_canon_bytes_value(value: &Value) -> Result<Vec<u8>, JcsError> {
269 let mut out = Vec::new();
270 emit_value(&mut out, value, 0)?;
271 Ok(out)
272}
273
274fn emit_value(out: &mut Vec<u8>, value: &Value, depth: usize) -> Result<(), JcsError> {
275 if depth > MAX_NESTING_DEPTH {
276 return Err(JcsError::NestingDepthExceeded);
277 }
278 match value {
279 Value::Null => out.extend_from_slice(b"null"),
280 Value::Bool(boolean) => {
281 if *boolean {
282 out.extend_from_slice(b"true");
283 } else {
284 out.extend_from_slice(b"false");
285 }
286 }
287 Value::Number(number) => emit_number(out, number)?,
288 Value::String(string) => emit_string(out, string, "string value")?,
289 Value::Array(array) => {
290 out.push(b'[');
291 for (index, item) in array.iter().enumerate() {
292 if index > 0 {
293 out.push(b',');
294 }
295 emit_value(out, item, depth + 1)?;
296 }
297 out.push(b']');
298 }
299 Value::Object(object) => {
300 out.push(b'{');
301 let mut entries: Vec<_> = object.iter().collect();
302 entries.sort_by(|(left, _), (right, _)| cmp_utf16(left, right));
303
304 for (index, (key, item)) in entries.iter().enumerate() {
305 if index > 0 {
306 out.push(b',');
307 }
308 emit_string(out, key, "object property name")?;
309 out.push(b':');
310 emit_value(out, item, depth + 1)?;
311 }
312 out.push(b'}');
313 }
314 }
315 Ok(())
316}
317
318fn emit_number(out: &mut Vec<u8>, number: &Number) -> Result<(), JcsError> {
319 if let Some(value) = number.as_i64() {
320 let s = value.to_string();
321 ensure_exact_binary64_integer(value.unsigned_abs(), &s)?;
322 out.extend_from_slice(s.as_bytes());
323 return Ok(());
324 }
325
326 if let Some(value) = number.as_u64() {
327 let s = value.to_string();
328 ensure_exact_binary64_integer(value, &s)?;
329 out.extend_from_slice(s.as_bytes());
330 return Ok(());
331 }
332
333 if let Some(value) = number.as_f64() {
334 if !value.is_finite() {
335 return Err(JcsError::InvalidNumber(
336 "encountered a non-finite floating-point number".to_string(),
337 ));
338 }
339
340 let rendered = format_ecmascript_number(value)?;
341 out.extend_from_slice(rendered.as_bytes());
342 return Ok(());
343 }
344
345 Err(JcsError::InvalidNumber(
346 "unsupported JSON number representation".to_string(),
347 ))
348}
349
350fn emit_string(out: &mut Vec<u8>, value: &str, context: &str) -> Result<(), JcsError> {
351 validate_string_contents(value, context).map_err(JcsError::InvalidString)?;
352
353 out.push(b'"');
354 for ch in value.chars() {
355 match ch {
356 '"' => out.extend_from_slice(br#"\""#),
357 '\\' => out.extend_from_slice(br"\\"),
358 '\u{0008}' => out.extend_from_slice(br"\b"),
359 '\u{0009}' => out.extend_from_slice(br"\t"),
360 '\u{000A}' => out.extend_from_slice(br"\n"),
361 '\u{000C}' => out.extend_from_slice(br"\f"),
362 '\u{000D}' => out.extend_from_slice(br"\r"),
363 '\u{0000}'..='\u{001F}' => {
364 let escaped = format!(r"\u{:04x}", ch as u32);
365 out.extend_from_slice(escaped.as_bytes());
366 }
367 _ => {
368 let mut buf = [0u8; 4];
369 let encoded = ch.encode_utf8(&mut buf);
370 out.extend_from_slice(encoded.as_bytes());
371 }
372 }
373 }
374 out.push(b'"');
375
376 Ok(())
377}
378
379fn ensure_exact_binary64_integer(value: u64, original: &str) -> Result<(), JcsError> {
380 if is_exact_binary64_integer(value) {
381 Ok(())
382 } else {
383 Err(JcsError::InvalidNumber(format!(
384 "integer {original} is not exactly representable as an IEEE 754 double; encode it as a string"
385 )))
386 }
387}
388
389const fn is_exact_binary64_integer(value: u64) -> bool {
390 if value == 0 {
391 return true;
392 }
393 let bit_len = u64::BITS - value.leading_zeros();
394 bit_len <= 53 || value.trailing_zeros() >= bit_len - 53
395}
396
397fn format_ecmascript_number(value: f64) -> Result<String, JcsError> {
398 if value == 0.0 {
399 return Ok("0".to_string());
400 }
401
402 let mut buffer = zmij::Buffer::new();
403 let shortest = buffer.format_finite(value);
404 let (negative, body) = if let Some(stripped) = shortest.strip_prefix('-') {
405 (true, stripped)
406 } else {
407 (false, shortest)
408 };
409
410 let (digits, exponent) = parse_shortest_decimal(body)?;
411 let rendered = render_ecmascript_number(&digits, exponent)?;
412
413 if negative {
414 Ok(format!("-{rendered}"))
415 } else {
416 Ok(rendered)
417 }
418}
419
420fn parse_shortest_decimal(body: &str) -> Result<(String, i32), JcsError> {
421 if let Some((mantissa, exponent)) = body.split_once('e') {
422 let digits: String = mantissa.chars().filter(|&ch| ch != '.').collect();
423 let exponent = exponent.parse::<i32>().map_err(|error| {
424 JcsError::InvalidNumber(format!(
425 "failed to parse formatter exponent {exponent:?}: {error}"
426 ))
427 })?;
428 return Ok((digits, exponent + 1));
429 }
430
431 if let Some((integer, fractional)) = body.split_once('.') {
432 let fractional = fractional.trim_end_matches('0');
433
434 if integer != "0" {
435 let mut digits = String::with_capacity(integer.len() + fractional.len());
436 digits.push_str(integer);
437 digits.push_str(fractional);
438 let exponent = i32::try_from(integer.len()).map_err(|_| {
439 JcsError::InvalidNumber(
440 "formatter emitted an unexpectedly large integer part".to_string(),
441 )
442 })?;
443 return Ok((digits, exponent));
444 }
445
446 let leading_zeros = fractional.bytes().take_while(|&byte| byte == b'0').count();
447 let exponent = i32::try_from(leading_zeros).map_err(|_| {
448 JcsError::InvalidNumber(
449 "formatter emitted an unexpectedly long leading-zero run".to_string(),
450 )
451 })?;
452 return Ok((fractional[leading_zeros..].to_owned(), -exponent));
453 }
454
455 let exponent = i32::try_from(body.len()).map_err(|_| {
456 JcsError::InvalidNumber("formatter emitted an unexpectedly long integer".to_string())
457 })?;
458 Ok((body.to_owned(), exponent))
459}
460
461fn render_ecmascript_number(digits: &str, exponent: i32) -> Result<String, JcsError> {
462 let digits_len = i32::try_from(digits.len()).map_err(|_| {
463 JcsError::InvalidNumber("formatter emitted an unexpectedly long digit sequence".to_string())
464 })?;
465 if digits_len == 0 {
466 return Err(JcsError::InvalidNumber("empty digit sequence".to_string()));
467 }
468
469 if digits_len <= exponent && exponent <= 21 {
470 let capacity = usize::try_from(exponent).map_err(|_| {
471 JcsError::InvalidNumber(
472 "formatter produced a negative fixed-width exponent".to_string(),
473 )
474 })?;
475 let mut out = String::with_capacity(capacity);
476 out.push_str(digits);
477 for _ in 0..(exponent - digits_len) {
478 out.push('0');
479 }
480 return Ok(out);
481 }
482
483 if 0 < exponent && exponent <= 21 {
484 let split = usize::try_from(exponent).map_err(|_| {
485 JcsError::InvalidNumber("formatter produced a negative split exponent".to_string())
486 })?;
487 let mut out = String::with_capacity(digits.len() + 1);
488 out.push_str(&digits[..split]);
489 out.push('.');
490 out.push_str(&digits[split..]);
491 return Ok(out);
492 }
493
494 if -6 < exponent && exponent <= 0 {
495 let zeros = usize::try_from(-exponent).map_err(|_| {
496 JcsError::InvalidNumber("formatter produced an invalid negative exponent".to_string())
497 })?;
498 let mut out = String::with_capacity(2 + zeros + digits.len());
499 out.push_str("0.");
500 for _ in 0..zeros {
501 out.push('0');
502 }
503 out.push_str(digits);
504 return Ok(out);
505 }
506
507 let exponent = exponent - 1;
508 let (first, rest) = digits.split_at(1);
509 let mut out = String::with_capacity(digits.len() + 6);
510 out.push_str(first);
511 if !rest.is_empty() {
512 out.push('.');
513 out.push_str(rest);
514 }
515 out.push('e');
516 if exponent >= 0 {
517 out.push('+');
518 }
519 out.push_str(&exponent.to_string());
520 Ok(out)
521}
522
523fn cmp_utf16(left: &str, right: &str) -> Ordering {
524 left.encode_utf16().cmp(right.encode_utf16())
525}
526
527fn is_noncharacter(ch: char) -> bool {
528 let code = ch as u32;
529 (0xFDD0..=0xFDEF).contains(&code) || code & 0xFFFE == 0xFFFE
530}
531
532const DEPTH_EXCEEDED_SENTINEL: &str = "nesting depth exceeded maximum of ";
536
537fn parse_json_value_no_duplicates(json: &[u8]) -> Result<Value, JcsError> {
538 let mut deserializer = serde_json::Deserializer::from_slice(json);
539 deserializer.disable_recursion_limit();
542 let value = deserialize_json_value_no_duplicates(&mut deserializer).map_err(|e| {
543 if e.to_string().starts_with(DEPTH_EXCEEDED_SENTINEL) {
544 JcsError::NestingDepthExceeded
545 } else {
546 JcsError::Json(e)
547 }
548 })?;
549 deserializer.end()?;
550 Ok(value)
551}
552
553struct NoDuplicateValueSeed {
554 depth: usize,
555}
556
557impl<'de> DeserializeSeed<'de> for NoDuplicateValueSeed {
558 type Value = Value;
559
560 fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
561 where
562 D: Deserializer<'de>,
563 {
564 if self.depth > MAX_NESTING_DEPTH {
565 return Err(D::Error::custom(format!(
566 "{DEPTH_EXCEEDED_SENTINEL}{MAX_NESTING_DEPTH}"
567 )));
568 }
569 deserializer.deserialize_any(NoDuplicateValueVisitor { depth: self.depth })
570 }
571}
572
573struct NoDuplicateValueVisitor {
574 depth: usize,
575}
576
577impl<'de> Visitor<'de> for NoDuplicateValueVisitor {
578 type Value = Value;
579
580 fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
581 formatter.write_str("a valid JSON value")
582 }
583
584 fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E> {
585 Ok(Value::Bool(value))
586 }
587
588 fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> {
589 Ok(Value::Number(Number::from(value)))
590 }
591
592 fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> {
593 Ok(Value::Number(Number::from(value)))
594 }
595
596 fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
597 where
598 E: de::Error,
599 {
600 Number::from_f64(value)
601 .map(Value::Number)
602 .ok_or_else(|| E::custom("encountered a non-finite floating-point number"))
603 }
604
605 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
606 where
607 E: de::Error,
608 {
609 validate_string_contents(value, "string value").map_err(E::custom)?;
610 Ok(Value::String(value.to_owned()))
611 }
612
613 fn visit_borrowed_str<E>(self, value: &'de str) -> Result<Self::Value, E>
614 where
615 E: de::Error,
616 {
617 self.visit_str(value)
618 }
619
620 fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
621 where
622 E: de::Error,
623 {
624 validate_string_contents(&value, "string value").map_err(E::custom)?;
625 Ok(Value::String(value))
626 }
627
628 fn visit_none<E>(self) -> Result<Self::Value, E> {
629 Ok(Value::Null)
630 }
631
632 fn visit_unit<E>(self) -> Result<Self::Value, E> {
633 Ok(Value::Null)
634 }
635
636 fn visit_seq<A>(self, mut access: A) -> Result<Self::Value, A::Error>
637 where
638 A: SeqAccess<'de>,
639 {
640 let mut values = Vec::with_capacity(access.size_hint().unwrap_or(0));
641 while let Some(value) = access.next_element_seed(NoDuplicateValueSeed {
642 depth: self.depth + 1,
643 })? {
644 values.push(value);
645 }
646 Ok(Value::Array(values))
647 }
648
649 fn visit_map<A>(self, mut access: A) -> Result<Self::Value, A::Error>
650 where
651 A: MapAccess<'de>,
652 {
653 let Some(first_key) = access.next_key::<String>()? else {
654 return Ok(Value::Object(serde_json::Map::new()));
655 };
656
657 if !first_key.starts_with('$') {
660 validate_string_contents(&first_key, "object property name")
661 .map_err(A::Error::custom)?;
662 }
663
664 let first_value = access.next_value_seed(NoDuplicateValueSeed {
665 depth: self.depth + 1,
666 })?;
667
668 let mut object = serde_json::Map::new();
669 object.insert(first_key.clone(), first_value);
670
671 let mut seen = BTreeSet::new();
672 seen.insert(first_key);
673
674 while let Some(key) = access.next_key::<String>()? {
675 if !key.starts_with('$') {
679 validate_string_contents(&key, "object property name").map_err(A::Error::custom)?;
680 }
681
682 if !seen.insert(key.clone()) {
683 return Err(A::Error::custom(format!("duplicate property name `{key}`")));
684 }
685
686 let value = access.next_value_seed(NoDuplicateValueSeed {
687 depth: self.depth + 1,
688 })?;
689 object.insert(key, value);
690 }
691
692 serde_json::from_value(Value::Object(object)).map_err(A::Error::custom)
696 }
697}
698
699#[cfg(test)]
700#[path = "lib_tests.rs"]
701mod tests;