1use std::cmp::Ordering;
41use std::collections::BTreeSet;
42
43use serde::de::{self, DeserializeSeed, Error as DeError, MapAccess, SeqAccess, Visitor};
44use serde::{Deserializer, Serialize};
45use serde_json::{Number, Value};
46
47pub const MAX_NESTING_DEPTH: usize = 128;
49
50#[derive(Debug)]
52#[non_exhaustive]
53pub enum JcsError {
54 Json(serde_json::Error),
56 InvalidString(String),
58 InvalidNumber(String),
60 NestingDepthExceeded,
62}
63
64impl std::fmt::Display for JcsError {
65 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66 match self {
67 Self::Json(e) => write!(f, "JCS JSON processing failed: {e}"),
68 Self::InvalidString(msg) => write!(f, "JCS string validation failed: {msg}"),
69 Self::InvalidNumber(msg) => write!(f, "JCS number validation failed: {msg}"),
70 Self::NestingDepthExceeded => write!(
71 f,
72 "JCS nesting depth exceeded maximum of {MAX_NESTING_DEPTH}"
73 ),
74 }
75 }
76}
77
78impl std::error::Error for JcsError {
79 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
80 match self {
81 Self::Json(e) => Some(e),
82 Self::InvalidString(_) | Self::InvalidNumber(_) | Self::NestingDepthExceeded => None,
83 }
84 }
85}
86
87impl From<serde_json::Error> for JcsError {
88 fn from(error: serde_json::Error) -> Self {
89 Self::Json(error)
90 }
91}
92
93#[deprecated(
109 since = "0.3.0",
110 note = "use to_canon_bytes_from_slice for untrusted input; see PUBLIC_SURFACE.md"
111)]
112pub fn to_canon_bytes<T: Serialize>(value: &T) -> Result<Vec<u8>, JcsError> {
113 let value = serde_json::to_value(value)?;
114 to_canon_bytes_value(&value)
115}
116
117#[deprecated(
127 since = "0.3.0",
128 note = "use to_canon_string_from_str for untrusted input; see PUBLIC_SURFACE.md"
129)]
130pub fn to_canon_string<T: Serialize>(value: &T) -> Result<String, JcsError> {
131 let value = serde_json::to_value(value)?;
132 let bytes = to_canon_bytes_value(&value)?;
133 String::from_utf8(bytes).map_err(|error| {
134 JcsError::InvalidString(format!(
135 "canonical JSON output was not valid UTF-8: {error}"
136 ))
137 })
138}
139
140pub fn to_canon_bytes_from_slice(json: &[u8]) -> Result<Vec<u8>, JcsError> {
153 let value = parse_json_value_no_duplicates(json)?;
154 to_canon_bytes_value(&value)
155}
156
157pub fn to_canon_string_from_str(json: &str) -> Result<String, JcsError> {
164 let bytes = to_canon_bytes_from_slice(json.as_bytes())?;
165 String::from_utf8(bytes).map_err(|error| {
166 JcsError::InvalidString(format!(
167 "canonical JSON output was not valid UTF-8: {error}"
168 ))
169 })
170}
171
172pub fn canonicalize(v: &mut Value) -> Result<(), JcsError> {
186 canonicalize_depth(v, 0)
187}
188
189fn canonicalize_depth(v: &mut Value, depth: usize) -> Result<(), JcsError> {
190 if depth > MAX_NESTING_DEPTH {
191 return Err(JcsError::NestingDepthExceeded);
192 }
193 match v {
194 Value::Object(map) => {
195 let mut entries: Vec<(String, Value)> = std::mem::take(map).into_iter().collect();
196 entries.sort_by(|(a, _), (b, _)| cmp_utf16(a, b));
197 for (key, mut value) in entries {
198 canonicalize_depth(&mut value, depth + 1)?;
199 map.insert(key, value);
200 }
201 }
202 Value::Array(arr) => {
203 for x in arr {
204 canonicalize_depth(x, depth + 1)?;
205 }
206 }
207 _ => {}
208 }
209 Ok(())
210}
211
212#[doc(hidden)]
226pub fn deserialize_json_value_no_duplicates<'de, D>(deserializer: D) -> Result<Value, D::Error>
227where
228 D: Deserializer<'de>,
229{
230 NoDuplicateValueSeed { depth: 0 }.deserialize(deserializer)
231}
232
233#[doc(hidden)]
240pub fn validate_string_contents(value: &str, context: &str) -> Result<(), String> {
241 if let Some(ch) = value.chars().find(|&ch| is_noncharacter(ch)) {
242 return Err(format!(
243 "{context} contains the forbidden noncharacter U+{:04X}",
244 ch as u32
245 ));
246 }
247 Ok(())
248}
249
250#[doc(hidden)]
252#[must_use]
253pub fn is_safe_integer(value: i64) -> bool {
254 (-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&value)
255}
256
257const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991;
260
261fn to_canon_bytes_value(value: &Value) -> Result<Vec<u8>, JcsError> {
262 let mut out = Vec::new();
263 emit_value(&mut out, value, 0)?;
264 Ok(out)
265}
266
267fn emit_value(out: &mut Vec<u8>, value: &Value, depth: usize) -> Result<(), JcsError> {
268 if depth > MAX_NESTING_DEPTH {
269 return Err(JcsError::NestingDepthExceeded);
270 }
271 match value {
272 Value::Null => out.extend_from_slice(b"null"),
273 Value::Bool(boolean) => {
274 if *boolean {
275 out.extend_from_slice(b"true");
276 } else {
277 out.extend_from_slice(b"false");
278 }
279 }
280 Value::Number(number) => emit_number(out, number)?,
281 Value::String(string) => emit_string(out, string, "string value")?,
282 Value::Array(array) => {
283 out.push(b'[');
284 for (index, item) in array.iter().enumerate() {
285 if index > 0 {
286 out.push(b',');
287 }
288 emit_value(out, item, depth + 1)?;
289 }
290 out.push(b']');
291 }
292 Value::Object(object) => {
293 out.push(b'{');
294 let mut entries: Vec<_> = object.iter().collect();
295 entries.sort_by(|(left, _), (right, _)| cmp_utf16(left, right));
296
297 for (index, (key, item)) in entries.iter().enumerate() {
298 if index > 0 {
299 out.push(b',');
300 }
301 emit_string(out, key, "object property name")?;
302 out.push(b':');
303 emit_value(out, item, depth + 1)?;
304 }
305 out.push(b'}');
306 }
307 }
308 Ok(())
309}
310
311fn emit_number(out: &mut Vec<u8>, number: &Number) -> Result<(), JcsError> {
312 if let Some(value) = number.as_i64() {
313 let s = value.to_string();
314 ensure_exact_binary64_integer(value.unsigned_abs(), &s)?;
315 out.extend_from_slice(s.as_bytes());
316 return Ok(());
317 }
318
319 if let Some(value) = number.as_u64() {
320 let s = value.to_string();
321 ensure_exact_binary64_integer(value, &s)?;
322 out.extend_from_slice(s.as_bytes());
323 return Ok(());
324 }
325
326 if let Some(value) = number.as_f64() {
327 if !value.is_finite() {
328 return Err(JcsError::InvalidNumber(
329 "encountered a non-finite floating-point number".to_string(),
330 ));
331 }
332
333 let rendered = format_ecmascript_number(value)?;
334 out.extend_from_slice(rendered.as_bytes());
335 return Ok(());
336 }
337
338 Err(JcsError::InvalidNumber(
339 "unsupported JSON number representation".to_string(),
340 ))
341}
342
343fn emit_string(out: &mut Vec<u8>, value: &str, context: &str) -> Result<(), JcsError> {
344 validate_string_contents(value, context).map_err(JcsError::InvalidString)?;
345
346 out.push(b'"');
347 for ch in value.chars() {
348 match ch {
349 '"' => out.extend_from_slice(br#"\""#),
350 '\\' => out.extend_from_slice(br"\\"),
351 '\u{0008}' => out.extend_from_slice(br"\b"),
352 '\u{0009}' => out.extend_from_slice(br"\t"),
353 '\u{000A}' => out.extend_from_slice(br"\n"),
354 '\u{000C}' => out.extend_from_slice(br"\f"),
355 '\u{000D}' => out.extend_from_slice(br"\r"),
356 '\u{0000}'..='\u{001F}' => {
357 let escaped = format!(r"\u{:04x}", ch as u32);
358 out.extend_from_slice(escaped.as_bytes());
359 }
360 _ => {
361 let mut buf = [0u8; 4];
362 let encoded = ch.encode_utf8(&mut buf);
363 out.extend_from_slice(encoded.as_bytes());
364 }
365 }
366 }
367 out.push(b'"');
368
369 Ok(())
370}
371
372fn ensure_exact_binary64_integer(value: u64, original: &str) -> Result<(), JcsError> {
373 if is_exact_binary64_integer(value) {
374 Ok(())
375 } else {
376 Err(JcsError::InvalidNumber(format!(
377 "integer {original} is not exactly representable as an IEEE 754 double; encode it as a string"
378 )))
379 }
380}
381
382const fn is_exact_binary64_integer(value: u64) -> bool {
383 if value == 0 {
384 return true;
385 }
386 let bit_len = u64::BITS - value.leading_zeros();
387 bit_len <= 53 || value.trailing_zeros() >= bit_len - 53
388}
389
390fn format_ecmascript_number(value: f64) -> Result<String, JcsError> {
391 if value == 0.0 {
392 return Ok("0".to_string());
393 }
394
395 let mut buffer = zmij::Buffer::new();
396 let shortest = buffer.format_finite(value);
397 let (negative, body) = if let Some(stripped) = shortest.strip_prefix('-') {
398 (true, stripped)
399 } else {
400 (false, shortest)
401 };
402
403 let (digits, exponent) = parse_shortest_decimal(body)?;
404 let rendered = render_ecmascript_number(&digits, exponent)?;
405
406 if negative {
407 Ok(format!("-{rendered}"))
408 } else {
409 Ok(rendered)
410 }
411}
412
413fn parse_shortest_decimal(body: &str) -> Result<(String, i32), JcsError> {
414 if let Some((mantissa, exponent)) = body.split_once('e') {
415 let digits: String = mantissa.chars().filter(|&ch| ch != '.').collect();
416 let exponent = exponent.parse::<i32>().map_err(|error| {
417 JcsError::InvalidNumber(format!(
418 "failed to parse formatter exponent {exponent:?}: {error}"
419 ))
420 })?;
421 return Ok((digits, exponent + 1));
422 }
423
424 if let Some((integer, fractional)) = body.split_once('.') {
425 let fractional = fractional.trim_end_matches('0');
426
427 if integer != "0" {
428 let mut digits = String::with_capacity(integer.len() + fractional.len());
429 digits.push_str(integer);
430 digits.push_str(fractional);
431 let exponent = i32::try_from(integer.len()).map_err(|_| {
432 JcsError::InvalidNumber(
433 "formatter emitted an unexpectedly large integer part".to_string(),
434 )
435 })?;
436 return Ok((digits, exponent));
437 }
438
439 let leading_zeros = fractional.bytes().take_while(|&byte| byte == b'0').count();
440 let exponent = i32::try_from(leading_zeros).map_err(|_| {
441 JcsError::InvalidNumber(
442 "formatter emitted an unexpectedly long leading-zero run".to_string(),
443 )
444 })?;
445 return Ok((fractional[leading_zeros..].to_owned(), -exponent));
446 }
447
448 let exponent = i32::try_from(body.len()).map_err(|_| {
449 JcsError::InvalidNumber("formatter emitted an unexpectedly long integer".to_string())
450 })?;
451 Ok((body.to_owned(), exponent))
452}
453
454fn render_ecmascript_number(digits: &str, exponent: i32) -> Result<String, JcsError> {
455 let digits_len = i32::try_from(digits.len()).map_err(|_| {
456 JcsError::InvalidNumber("formatter emitted an unexpectedly long digit sequence".to_string())
457 })?;
458 if digits_len == 0 {
459 return Err(JcsError::InvalidNumber("empty digit sequence".to_string()));
460 }
461
462 if digits_len <= exponent && exponent <= 21 {
463 let capacity = usize::try_from(exponent).map_err(|_| {
464 JcsError::InvalidNumber(
465 "formatter produced a negative fixed-width exponent".to_string(),
466 )
467 })?;
468 let mut out = String::with_capacity(capacity);
469 out.push_str(digits);
470 for _ in 0..(exponent - digits_len) {
471 out.push('0');
472 }
473 return Ok(out);
474 }
475
476 if 0 < exponent && exponent <= 21 {
477 let split = usize::try_from(exponent).map_err(|_| {
478 JcsError::InvalidNumber("formatter produced a negative split exponent".to_string())
479 })?;
480 let mut out = String::with_capacity(digits.len() + 1);
481 out.push_str(&digits[..split]);
482 out.push('.');
483 out.push_str(&digits[split..]);
484 return Ok(out);
485 }
486
487 if -6 < exponent && exponent <= 0 {
488 let zeros = usize::try_from(-exponent).map_err(|_| {
489 JcsError::InvalidNumber("formatter produced an invalid negative exponent".to_string())
490 })?;
491 let mut out = String::with_capacity(2 + zeros + digits.len());
492 out.push_str("0.");
493 for _ in 0..zeros {
494 out.push('0');
495 }
496 out.push_str(digits);
497 return Ok(out);
498 }
499
500 let exponent = exponent - 1;
501 let (first, rest) = digits.split_at(1);
502 let mut out = String::with_capacity(digits.len() + 6);
503 out.push_str(first);
504 if !rest.is_empty() {
505 out.push('.');
506 out.push_str(rest);
507 }
508 out.push('e');
509 if exponent >= 0 {
510 out.push('+');
511 }
512 out.push_str(&exponent.to_string());
513 Ok(out)
514}
515
516fn cmp_utf16(left: &str, right: &str) -> Ordering {
517 left.encode_utf16().cmp(right.encode_utf16())
518}
519
520fn is_noncharacter(ch: char) -> bool {
521 let code = ch as u32;
522 (0xFDD0..=0xFDEF).contains(&code) || code & 0xFFFE == 0xFFFE
523}
524
525const DEPTH_EXCEEDED_SENTINEL: &str = "nesting depth exceeded maximum of ";
529
530fn parse_json_value_no_duplicates(json: &[u8]) -> Result<Value, JcsError> {
531 let mut deserializer = serde_json::Deserializer::from_slice(json);
532 deserializer.disable_recursion_limit();
535 let value = deserialize_json_value_no_duplicates(&mut deserializer).map_err(|e| {
536 if e.to_string().starts_with(DEPTH_EXCEEDED_SENTINEL) {
537 JcsError::NestingDepthExceeded
538 } else {
539 JcsError::Json(e)
540 }
541 })?;
542 deserializer.end()?;
543 Ok(value)
544}
545
546struct NoDuplicateValueSeed {
547 depth: usize,
548}
549
550impl<'de> DeserializeSeed<'de> for NoDuplicateValueSeed {
551 type Value = Value;
552
553 fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
554 where
555 D: Deserializer<'de>,
556 {
557 if self.depth > MAX_NESTING_DEPTH {
558 return Err(D::Error::custom(format!(
559 "{DEPTH_EXCEEDED_SENTINEL}{MAX_NESTING_DEPTH}"
560 )));
561 }
562 deserializer.deserialize_any(NoDuplicateValueVisitor { depth: self.depth })
563 }
564}
565
566struct NoDuplicateValueVisitor {
567 depth: usize,
568}
569
570impl<'de> Visitor<'de> for NoDuplicateValueVisitor {
571 type Value = Value;
572
573 fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
574 formatter.write_str("a valid JSON value")
575 }
576
577 fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E> {
578 Ok(Value::Bool(value))
579 }
580
581 fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> {
582 Ok(Value::Number(Number::from(value)))
583 }
584
585 fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> {
586 Ok(Value::Number(Number::from(value)))
587 }
588
589 fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
590 where
591 E: de::Error,
592 {
593 Number::from_f64(value)
594 .map(Value::Number)
595 .ok_or_else(|| E::custom("encountered a non-finite floating-point number"))
596 }
597
598 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
599 where
600 E: de::Error,
601 {
602 validate_string_contents(value, "string value").map_err(E::custom)?;
603 Ok(Value::String(value.to_owned()))
604 }
605
606 fn visit_borrowed_str<E>(self, value: &'de str) -> Result<Self::Value, E>
607 where
608 E: de::Error,
609 {
610 self.visit_str(value)
611 }
612
613 fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
614 where
615 E: de::Error,
616 {
617 validate_string_contents(&value, "string value").map_err(E::custom)?;
618 Ok(Value::String(value))
619 }
620
621 fn visit_none<E>(self) -> Result<Self::Value, E> {
622 Ok(Value::Null)
623 }
624
625 fn visit_unit<E>(self) -> Result<Self::Value, E> {
626 Ok(Value::Null)
627 }
628
629 fn visit_seq<A>(self, mut access: A) -> Result<Self::Value, A::Error>
630 where
631 A: SeqAccess<'de>,
632 {
633 let mut values = Vec::with_capacity(access.size_hint().unwrap_or(0));
634 while let Some(value) = access.next_element_seed(NoDuplicateValueSeed {
635 depth: self.depth + 1,
636 })? {
637 values.push(value);
638 }
639 Ok(Value::Array(values))
640 }
641
642 fn visit_map<A>(self, mut access: A) -> Result<Self::Value, A::Error>
643 where
644 A: MapAccess<'de>,
645 {
646 let Some(first_key) = access.next_key::<String>()? else {
647 return Ok(Value::Object(serde_json::Map::new()));
648 };
649
650 if !first_key.starts_with('$') {
657 validate_string_contents(&first_key, "object property name")
658 .map_err(A::Error::custom)?;
659 }
660
661 let first_value = access.next_value_seed(NoDuplicateValueSeed {
662 depth: self.depth + 1,
663 })?;
664
665 let mut object = serde_json::Map::new();
666 object.insert(first_key.clone(), first_value);
667
668 let mut seen = BTreeSet::new();
669 seen.insert(first_key);
670
671 while let Some(key) = access.next_key::<String>()? {
672 if !key.starts_with('$') {
674 validate_string_contents(&key, "object property name").map_err(A::Error::custom)?;
675 }
676
677 if !seen.insert(key.clone()) {
678 return Err(A::Error::custom(format!("duplicate property name `{key}`")));
679 }
680
681 let value = access.next_value_seed(NoDuplicateValueSeed {
682 depth: self.depth + 1,
683 })?;
684 object.insert(key, value);
685 }
686
687 serde_json::from_value(Value::Object(object)).map_err(A::Error::custom)
691 }
692}
693
694#[cfg(test)]
695#[path = "lib_tests.rs"]
696mod tests;