1use std::cmp::Ordering;
41use std::collections::BTreeSet;
42
43use serde::de::{self, DeserializeSeed, Error as DeError, MapAccess, SeqAccess, Visitor};
44use serde::{Deserializer, Serialize};
45use serde_json::{Number, Value};
46
47pub const MAX_NESTING_DEPTH: usize = 128;
54
55#[derive(Debug)]
57pub enum JcsError {
58 Json(serde_json::Error),
60 InvalidString(String),
62 InvalidNumber(String),
64 NestingDepthExceeded,
66}
67
68impl std::fmt::Display for JcsError {
69 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70 match self {
71 Self::Json(e) => write!(f, "JCS JSON processing failed: {e}"),
72 Self::InvalidString(msg) => write!(f, "JCS string validation failed: {msg}"),
73 Self::InvalidNumber(msg) => write!(f, "JCS number validation failed: {msg}"),
74 Self::NestingDepthExceeded => write!(
75 f,
76 "JCS nesting depth exceeded maximum of {MAX_NESTING_DEPTH}"
77 ),
78 }
79 }
80}
81
82impl std::error::Error for JcsError {
83 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
84 match self {
85 Self::Json(e) => Some(e),
86 Self::InvalidString(_) | Self::InvalidNumber(_) | Self::NestingDepthExceeded => None,
87 }
88 }
89}
90
91impl From<serde_json::Error> for JcsError {
92 fn from(error: serde_json::Error) -> Self {
93 Self::Json(error)
94 }
95}
96
97#[deprecated(
113 since = "0.3.0",
114 note = "use to_canon_bytes_from_slice for untrusted input; see PUBLIC_SURFACE.md"
115)]
116pub fn to_canon_bytes<T: Serialize>(value: &T) -> Result<Vec<u8>, JcsError> {
117 let value = serde_json::to_value(value)?;
118 to_canon_bytes_value(&value)
119}
120
121#[deprecated(
127 since = "0.3.0",
128 note = "use to_canon_string_from_str for untrusted input; see PUBLIC_SURFACE.md"
129)]
130pub fn to_canon_string<T: Serialize>(value: &T) -> Result<String, JcsError> {
131 let value = serde_json::to_value(value)?;
132 let bytes = to_canon_bytes_value(&value)?;
133 String::from_utf8(bytes).map_err(|error| {
134 JcsError::InvalidString(format!(
135 "canonical JSON output was not valid UTF-8: {error}"
136 ))
137 })
138}
139
140pub fn to_canon_bytes_from_slice(json: &[u8]) -> Result<Vec<u8>, JcsError> {
153 let value = parse_json_value_no_duplicates(json)?;
154 to_canon_bytes_value(&value)
155}
156
157pub fn to_canon_string_from_str(json: &str) -> Result<String, JcsError> {
164 let bytes = to_canon_bytes_from_slice(json.as_bytes())?;
165 String::from_utf8(bytes).map_err(|error| {
166 JcsError::InvalidString(format!(
167 "canonical JSON output was not valid UTF-8: {error}"
168 ))
169 })
170}
171
172pub fn canonicalize(v: &mut Value) -> Result<(), JcsError> {
186 canonicalize_depth(v, 0)
187}
188
189fn canonicalize_depth(v: &mut Value, depth: usize) -> Result<(), JcsError> {
190 if depth > MAX_NESTING_DEPTH {
191 return Err(JcsError::NestingDepthExceeded);
192 }
193 match v {
194 Value::Object(map) => {
195 let keys: Vec<String> = map.keys().cloned().collect();
196 let mut entries: Vec<(String, Value)> = keys
197 .into_iter()
198 .filter_map(|k| map.remove(&k).map(|v| (k, v)))
199 .collect();
200 entries.sort_by(|(a, _), (b, _)| cmp_utf16(a, b));
201 for (key, mut value) in entries {
202 canonicalize_depth(&mut value, depth + 1)?;
203 map.insert(key, value);
204 }
205 }
206 Value::Array(arr) => {
207 for x in arr {
208 canonicalize_depth(x, depth + 1)?;
209 }
210 }
211 _ => {}
212 }
213 Ok(())
214}
215
216#[doc(hidden)]
231pub fn deserialize_json_value_no_duplicates<'de, D>(deserializer: D) -> Result<Value, D::Error>
232where
233 D: Deserializer<'de>,
234{
235 NoDuplicateValueSeed { depth: 0 }.deserialize(deserializer)
236}
237
238#[doc(hidden)]
245pub fn validate_string_contents(value: &str, context: &str) -> Result<(), String> {
246 if let Some(ch) = value.chars().find(|&ch| is_noncharacter(ch)) {
247 return Err(format!(
248 "{context} contains the forbidden noncharacter U+{:04X}",
249 ch as u32
250 ));
251 }
252 Ok(())
253}
254
255#[doc(hidden)]
257#[must_use]
258pub fn is_safe_integer(value: i64) -> bool {
259 (-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&value)
260}
261
262const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991;
265
266fn to_canon_bytes_value(value: &Value) -> Result<Vec<u8>, JcsError> {
267 let mut out = Vec::new();
268 emit_value(&mut out, value, 0)?;
269 Ok(out)
270}
271
272fn emit_value(out: &mut Vec<u8>, value: &Value, depth: usize) -> Result<(), JcsError> {
273 if depth > MAX_NESTING_DEPTH {
274 return Err(JcsError::NestingDepthExceeded);
275 }
276 match value {
277 Value::Null => out.extend_from_slice(b"null"),
278 Value::Bool(boolean) => {
279 if *boolean {
280 out.extend_from_slice(b"true");
281 } else {
282 out.extend_from_slice(b"false");
283 }
284 }
285 Value::Number(number) => emit_number(out, number)?,
286 Value::String(string) => emit_string(out, string, "string value")?,
287 Value::Array(array) => {
288 out.push(b'[');
289 for (index, item) in array.iter().enumerate() {
290 if index > 0 {
291 out.push(b',');
292 }
293 emit_value(out, item, depth + 1)?;
294 }
295 out.push(b']');
296 }
297 Value::Object(object) => {
298 out.push(b'{');
299 let mut entries: Vec<_> = object.iter().collect();
300 entries.sort_by(|(left, _), (right, _)| cmp_utf16(left, right));
301
302 for (index, (key, item)) in entries.iter().enumerate() {
303 if index > 0 {
304 out.push(b',');
305 }
306 emit_string(out, key, "object property name")?;
307 out.push(b':');
308 emit_value(out, item, depth + 1)?;
309 }
310 out.push(b'}');
311 }
312 }
313 Ok(())
314}
315
316fn emit_number(out: &mut Vec<u8>, number: &Number) -> Result<(), JcsError> {
317 if let Some(value) = number.as_i64() {
318 ensure_exact_binary64_integer(value.unsigned_abs(), &value.to_string())?;
319 out.extend_from_slice(value.to_string().as_bytes());
320 return Ok(());
321 }
322
323 if let Some(value) = number.as_u64() {
324 ensure_exact_binary64_integer(value, &value.to_string())?;
325 out.extend_from_slice(value.to_string().as_bytes());
326 return Ok(());
327 }
328
329 if let Some(value) = number.as_f64() {
330 if !value.is_finite() {
331 return Err(JcsError::InvalidNumber(
332 "encountered a non-finite floating-point number".to_string(),
333 ));
334 }
335
336 let rendered = format_ecmascript_number(value)?;
337 out.extend_from_slice(rendered.as_bytes());
338 return Ok(());
339 }
340
341 Err(JcsError::InvalidNumber(
342 "unsupported JSON number representation".to_string(),
343 ))
344}
345
346fn emit_string(out: &mut Vec<u8>, value: &str, context: &str) -> Result<(), JcsError> {
347 validate_string_contents(value, context).map_err(JcsError::InvalidString)?;
348
349 out.push(b'"');
350 for ch in value.chars() {
351 match ch {
352 '"' => out.extend_from_slice(br#"\""#),
353 '\\' => out.extend_from_slice(br"\\"),
354 '\u{0008}' => out.extend_from_slice(br"\b"),
355 '\u{0009}' => out.extend_from_slice(br"\t"),
356 '\u{000A}' => out.extend_from_slice(br"\n"),
357 '\u{000C}' => out.extend_from_slice(br"\f"),
358 '\u{000D}' => out.extend_from_slice(br"\r"),
359 '\u{0000}'..='\u{001F}' => {
360 let escaped = format!(r"\u{:04x}", ch as u32);
361 out.extend_from_slice(escaped.as_bytes());
362 }
363 _ => {
364 let mut buf = [0u8; 4];
365 let encoded = ch.encode_utf8(&mut buf);
366 out.extend_from_slice(encoded.as_bytes());
367 }
368 }
369 }
370 out.push(b'"');
371
372 Ok(())
373}
374
375fn ensure_exact_binary64_integer(value: u64, original: &str) -> Result<(), JcsError> {
376 if is_exact_binary64_integer(value) {
377 Ok(())
378 } else {
379 Err(JcsError::InvalidNumber(format!(
380 "integer {original} is not exactly representable as an IEEE 754 double; encode it as a string"
381 )))
382 }
383}
384
385const fn is_exact_binary64_integer(value: u64) -> bool {
386 if value == 0 {
387 return true;
388 }
389 let bit_len = u64::BITS - value.leading_zeros();
390 bit_len <= 53 || value.trailing_zeros() >= bit_len - 53
391}
392
393fn format_ecmascript_number(value: f64) -> Result<String, JcsError> {
394 if value == 0.0 {
395 return Ok("0".to_string());
396 }
397
398 let mut buffer = zmij::Buffer::new();
399 let shortest = buffer.format_finite(value);
400 let (negative, body) = if let Some(stripped) = shortest.strip_prefix('-') {
401 (true, stripped)
402 } else {
403 (false, shortest)
404 };
405
406 let (digits, exponent) = parse_shortest_decimal(body)?;
407 let rendered = render_ecmascript_number(&digits, exponent)?;
408
409 if negative {
410 Ok(format!("-{rendered}"))
411 } else {
412 Ok(rendered)
413 }
414}
415
416fn parse_shortest_decimal(body: &str) -> Result<(String, i32), JcsError> {
417 if let Some((mantissa, exponent)) = body.split_once('e') {
418 let digits: String = mantissa.chars().filter(|&ch| ch != '.').collect();
419 let exponent = exponent.parse::<i32>().map_err(|error| {
420 JcsError::InvalidNumber(format!(
421 "failed to parse formatter exponent {exponent:?}: {error}"
422 ))
423 })?;
424 return Ok((digits, exponent + 1));
425 }
426
427 if let Some((integer, fractional)) = body.split_once('.') {
428 let fractional = fractional.trim_end_matches('0');
429
430 if integer != "0" {
431 let mut digits = String::with_capacity(integer.len() + fractional.len());
432 digits.push_str(integer);
433 digits.push_str(fractional);
434 let exponent = i32::try_from(integer.len()).map_err(|_| {
435 JcsError::InvalidNumber(
436 "formatter emitted an unexpectedly large integer part".to_string(),
437 )
438 })?;
439 return Ok((digits, exponent));
440 }
441
442 let leading_zeros = fractional.bytes().take_while(|&byte| byte == b'0').count();
443 let exponent = i32::try_from(leading_zeros).map_err(|_| {
444 JcsError::InvalidNumber(
445 "formatter emitted an unexpectedly long leading-zero run".to_string(),
446 )
447 })?;
448 return Ok((fractional[leading_zeros..].to_owned(), -exponent));
449 }
450
451 let exponent = i32::try_from(body.len()).map_err(|_| {
452 JcsError::InvalidNumber("formatter emitted an unexpectedly long integer".to_string())
453 })?;
454 Ok((body.to_owned(), exponent))
455}
456
457fn render_ecmascript_number(digits: &str, exponent: i32) -> Result<String, JcsError> {
458 let digits_len = i32::try_from(digits.len()).map_err(|_| {
459 JcsError::InvalidNumber("formatter emitted an unexpectedly long digit sequence".to_string())
460 })?;
461 if digits_len == 0 {
462 return Err(JcsError::InvalidNumber("empty digit sequence".to_string()));
463 }
464
465 if digits_len <= exponent && exponent <= 21 {
466 let capacity = usize::try_from(exponent).map_err(|_| {
467 JcsError::InvalidNumber(
468 "formatter produced a negative fixed-width exponent".to_string(),
469 )
470 })?;
471 let mut out = String::with_capacity(capacity);
472 out.push_str(digits);
473 for _ in 0..(exponent - digits_len) {
474 out.push('0');
475 }
476 return Ok(out);
477 }
478
479 if 0 < exponent && exponent <= 21 {
480 let split = usize::try_from(exponent).map_err(|_| {
481 JcsError::InvalidNumber("formatter produced a negative split exponent".to_string())
482 })?;
483 let mut out = String::with_capacity(digits.len() + 1);
484 out.push_str(&digits[..split]);
485 out.push('.');
486 out.push_str(&digits[split..]);
487 return Ok(out);
488 }
489
490 if -6 < exponent && exponent <= 0 {
491 let zeros = usize::try_from(-exponent).map_err(|_| {
492 JcsError::InvalidNumber("formatter produced an invalid negative exponent".to_string())
493 })?;
494 let mut out = String::with_capacity(2 + zeros + digits.len());
495 out.push_str("0.");
496 for _ in 0..zeros {
497 out.push('0');
498 }
499 out.push_str(digits);
500 return Ok(out);
501 }
502
503 let exponent = exponent - 1;
504 let (first, rest) = digits.split_at(1);
505 let mut out = String::with_capacity(digits.len() + 6);
506 out.push_str(first);
507 if !rest.is_empty() {
508 out.push('.');
509 out.push_str(rest);
510 }
511 out.push('e');
512 if exponent >= 0 {
513 out.push('+');
514 }
515 out.push_str(&exponent.to_string());
516 Ok(out)
517}
518
519fn cmp_utf16(left: &str, right: &str) -> Ordering {
520 left.encode_utf16().cmp(right.encode_utf16())
521}
522
523fn is_noncharacter(ch: char) -> bool {
524 let code = ch as u32;
525 (0xFDD0..=0xFDEF).contains(&code) || (code <= 0x0010_FFFF && code & 0xFFFE == 0xFFFE)
526}
527
528const DEPTH_EXCEEDED_SENTINEL: &str = "nesting depth exceeded maximum of ";
532
533fn parse_json_value_no_duplicates(json: &[u8]) -> Result<Value, JcsError> {
534 let mut deserializer = serde_json::Deserializer::from_slice(json);
535 deserializer.disable_recursion_limit();
538 let value = deserialize_json_value_no_duplicates(&mut deserializer).map_err(|e| {
539 if e.to_string().starts_with(DEPTH_EXCEEDED_SENTINEL) {
540 JcsError::NestingDepthExceeded
541 } else {
542 JcsError::Json(e)
543 }
544 })?;
545 deserializer.end()?;
546 Ok(value)
547}
548
549struct NoDuplicateValueSeed {
550 depth: usize,
551}
552
553impl<'de> DeserializeSeed<'de> for NoDuplicateValueSeed {
554 type Value = Value;
555
556 fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
557 where
558 D: Deserializer<'de>,
559 {
560 if self.depth > MAX_NESTING_DEPTH {
561 return Err(D::Error::custom(format!(
562 "{DEPTH_EXCEEDED_SENTINEL}{MAX_NESTING_DEPTH}"
563 )));
564 }
565 deserializer.deserialize_any(NoDuplicateValueVisitor { depth: self.depth })
566 }
567}
568
569struct NoDuplicateValueVisitor {
570 depth: usize,
571}
572
573impl<'de> Visitor<'de> for NoDuplicateValueVisitor {
574 type Value = Value;
575
576 fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
577 formatter.write_str("a valid JSON value")
578 }
579
580 fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E> {
581 Ok(Value::Bool(value))
582 }
583
584 fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> {
585 Ok(Value::Number(Number::from(value)))
586 }
587
588 fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> {
589 Ok(Value::Number(Number::from(value)))
590 }
591
592 fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
593 where
594 E: de::Error,
595 {
596 Number::from_f64(value)
597 .map(Value::Number)
598 .ok_or_else(|| E::custom("encountered a non-finite floating-point number"))
599 }
600
601 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
602 where
603 E: de::Error,
604 {
605 validate_string_contents(value, "string value").map_err(E::custom)?;
606 Ok(Value::String(value.to_owned()))
607 }
608
609 fn visit_borrowed_str<E>(self, value: &'de str) -> Result<Self::Value, E>
610 where
611 E: de::Error,
612 {
613 self.visit_str(value)
614 }
615
616 fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
617 where
618 E: de::Error,
619 {
620 validate_string_contents(&value, "string value").map_err(E::custom)?;
621 Ok(Value::String(value))
622 }
623
624 fn visit_none<E>(self) -> Result<Self::Value, E> {
625 Ok(Value::Null)
626 }
627
628 fn visit_unit<E>(self) -> Result<Self::Value, E> {
629 Ok(Value::Null)
630 }
631
632 fn visit_seq<A>(self, mut access: A) -> Result<Self::Value, A::Error>
633 where
634 A: SeqAccess<'de>,
635 {
636 let mut values = Vec::with_capacity(access.size_hint().unwrap_or(0));
637 while let Some(value) = access.next_element_seed(NoDuplicateValueSeed {
638 depth: self.depth + 1,
639 })? {
640 values.push(value);
641 }
642 Ok(Value::Array(values))
643 }
644
645 fn visit_map<A>(self, mut access: A) -> Result<Self::Value, A::Error>
646 where
647 A: MapAccess<'de>,
648 {
649 let Some(first_key) = access.next_key::<String>()? else {
650 return Ok(Value::Object(serde_json::Map::new()));
651 };
652
653 if !first_key.starts_with('$') {
656 validate_string_contents(&first_key, "object property name")
657 .map_err(A::Error::custom)?;
658 }
659
660 let first_value = access.next_value_seed(NoDuplicateValueSeed {
661 depth: self.depth + 1,
662 })?;
663
664 let mut object = serde_json::Map::new();
665 object.insert(first_key.clone(), first_value);
666
667 let mut seen = BTreeSet::new();
668 seen.insert(first_key);
669
670 while let Some(key) = access.next_key::<String>()? {
671 if !key.starts_with('$') {
675 validate_string_contents(&key, "object property name").map_err(A::Error::custom)?;
676 }
677
678 if !seen.insert(key.clone()) {
679 return Err(A::Error::custom(format!("duplicate property name `{key}`")));
680 }
681
682 let value = access.next_value_seed(NoDuplicateValueSeed {
683 depth: self.depth + 1,
684 })?;
685 object.insert(key, value);
686 }
687
688 serde_json::from_value(Value::Object(object)).map_err(A::Error::custom)
692 }
693}
694
695#[cfg(test)]
696#[path = "lib_tests.rs"]
697mod tests;