Skip to main content

vr_jcs/
lib.rs

1//! # vr-jcs
2//!
3//! RFC 8785 JSON Canonicalization Scheme (JCS) for Rust.
4//!
5//! Produces canonical JSON suitable for deterministic digest computation,
6//! content hashing, and stable serialization boundaries. Implements the
7//! RFC 8785 rules that materially affect wire compatibility:
8//! - UTF-16 code-unit sorting for object property names
9//! - ECMAScript-compatible primitive serialization
10//! - UTF-8 output without insignificant whitespace
11//! - duplicate-property rejection on raw JSON parse paths
12//! - I-JSON string / number validation
13//!
14//! ## API
15//!
16//! - [`to_canon_bytes`] — Serialize any `Serialize` type to canonical JSON bytes
17//! - [`to_canon_string`] — Serialize any `Serialize` type to a canonical JSON string
18//! - [`to_canon_bytes_from_slice`] — Parse raw JSON and return canonical bytes (rejects duplicates)
19//! - [`to_canon_string_from_str`] — Parse raw JSON string and return canonical string
20//! - [`canonicalize`] — Sort object keys recursively in a `serde_json::Value` (in-place)
21//!
22//! ## Usage
23//!
24//! ```
25//! use vr_jcs::to_canon_string;
26//! use serde::Serialize;
27//!
28//! #[derive(Serialize)]
29//! struct Receipt {
30//!     z_field: u64,
31//!     a_field: u64,
32//! }
33//!
34//! let receipt = Receipt { z_field: 1, a_field: 2 };
35//! let json = to_canon_string(&receipt).expect("serialization");
36//! assert_eq!(json, r#"{"a_field":2,"z_field":1}"#);
37//! ```
38
39#![deny(clippy::unwrap_used)]
40#![deny(clippy::expect_used)]
41#![deny(clippy::panic)]
42#![warn(missing_docs)]
43
44use std::cmp::Ordering;
45use std::collections::BTreeSet;
46
47use serde::de::{self, DeserializeSeed, Error as DeError, MapAccess, SeqAccess, Visitor};
48use serde::{Deserializer, Serialize};
49use serde_json::{Number, Value};
50
51/// Error type for canonical JSON operations.
52#[derive(Debug)]
53pub enum JcsError {
54    /// JSON serialization or deserialization failed.
55    Json(serde_json::Error),
56    /// A JSON string violated I-JSON constraints.
57    InvalidString(String),
58    /// A JSON number violated JCS / I-JSON constraints.
59    InvalidNumber(String),
60}
61
62impl std::fmt::Display for JcsError {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        match self {
65            Self::Json(e) => write!(f, "JCS JSON processing failed: {e}"),
66            Self::InvalidString(msg) => write!(f, "JCS string validation failed: {msg}"),
67            Self::InvalidNumber(msg) => write!(f, "JCS number validation failed: {msg}"),
68        }
69    }
70}
71
72impl std::error::Error for JcsError {
73    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
74        match self {
75            Self::Json(e) => Some(e),
76            Self::InvalidString(_) | Self::InvalidNumber(_) => None,
77        }
78    }
79}
80
81impl From<serde_json::Error> for JcsError {
82    fn from(error: serde_json::Error) -> Self {
83        Self::Json(error)
84    }
85}
86
87// ── Public API ─────────────────────────────────────────────────────
88
89/// Serialize any `Serialize` type to canonical JSON bytes.
90///
91/// Suitable for digest and signature inputs where deterministic output is required.
92///
93/// # Errors
94///
95/// Returns:
96/// - [`JcsError::Json`] if serialization to JSON fails
97/// - [`JcsError::InvalidString`] if a string contains an I-JSON forbidden code point
98/// - [`JcsError::InvalidNumber`] if a number is not interoperable under JCS
99pub fn to_canon_bytes<T: Serialize>(value: &T) -> Result<Vec<u8>, JcsError> {
100    let value = serde_json::to_value(value)?;
101    to_canon_bytes_value(&value)
102}
103
104/// Serialize any `Serialize` type to a canonical JSON string.
105///
106/// # Errors
107///
108/// Returns the same errors as [`to_canon_bytes`].
109pub fn to_canon_string<T: Serialize>(value: &T) -> Result<String, JcsError> {
110    let bytes = to_canon_bytes(value)?;
111    String::from_utf8(bytes).map_err(|error| {
112        JcsError::InvalidString(format!(
113            "canonical JSON output was not valid UTF-8: {error}"
114        ))
115    })
116}
117
118/// Parse raw JSON text and return canonical JSON bytes.
119///
120/// Unlike [`to_canon_bytes`], this function rejects duplicate property names
121/// because it sees the original JSON syntax before it is collapsed into
122/// `serde_json::Value`.
123///
124/// # Errors
125///
126/// Returns the same errors as [`to_canon_bytes`], plus [`JcsError::Json`] for
127/// malformed JSON or duplicate property names.
128pub fn to_canon_bytes_from_slice(json: &[u8]) -> Result<Vec<u8>, JcsError> {
129    let value = parse_json_value_no_duplicates(json)?;
130    to_canon_bytes_value(&value)
131}
132
133/// Parse raw JSON text and return a canonical JSON string.
134///
135/// # Errors
136///
137/// Returns the same errors as [`to_canon_bytes_from_slice`].
138pub fn to_canon_string_from_str(json: &str) -> Result<String, JcsError> {
139    let bytes = to_canon_bytes_from_slice(json.as_bytes())?;
140    String::from_utf8(bytes).map_err(|error| {
141        JcsError::InvalidString(format!(
142            "canonical JSON output was not valid UTF-8: {error}"
143        ))
144    })
145}
146
147/// Recursively sort all object keys in a JSON value for canonical representation.
148///
149/// This function modifies the value in place, sorting all object keys
150/// by UTF-16 code units (RFC 8785) and recursively processing nested
151/// structures. Array element order is preserved.
152///
153/// For digest computation, prefer [`to_canon_bytes`] which handles the
154/// full RFC 8785 pipeline including number rendering and string validation.
155pub fn canonicalize(v: &mut Value) {
156    match v {
157        Value::Object(map) => {
158            let keys: Vec<String> = map.keys().cloned().collect();
159            let mut entries: Vec<(String, Value)> = keys
160                .into_iter()
161                .filter_map(|k| map.remove(&k).map(|v| (k, v)))
162                .collect();
163            entries.sort_by(|(a, _), (b, _)| cmp_utf16(a, b));
164            for (key, mut value) in entries {
165                canonicalize(&mut value);
166                map.insert(key, value);
167            }
168        }
169        Value::Array(arr) => {
170            for x in arr {
171                canonicalize(x);
172            }
173        }
174        _ => {}
175    }
176}
177
178// ── Provisional helpers for sibling crates ─────────────────────────
179//
180// Not part of the stable v0.2 API. Subject to change or removal
181// without semver bump. If still needed at publish time, these will
182// be gated behind `feature = "unstable"`.
183
184/// Deserialize a JSON value while rejecting duplicate property names.
185///
186/// Used by `vertrule-schemas` for ingestion validation.
187///
188/// # Errors
189///
190/// Returns an error if the input contains duplicate property names,
191/// forbidden noncharacters, or is otherwise invalid JSON.
192#[doc(hidden)]
193pub fn deserialize_json_value_no_duplicates<'de, D>(deserializer: D) -> Result<Value, D::Error>
194where
195    D: Deserializer<'de>,
196{
197    NoDuplicateValueSeed.deserialize(deserializer)
198}
199
200/// Validate that a string contains no I-JSON forbidden noncharacters.
201///
202/// # Errors
203///
204/// Returns a description of the violation if the string contains a
205/// forbidden Unicode noncharacter (U+FDD0..U+FDEF, U+xFFFE, U+xFFFF).
206#[doc(hidden)]
207pub fn validate_string_contents(value: &str, context: &str) -> Result<(), String> {
208    if let Some(ch) = value.chars().find(|&ch| is_noncharacter(ch)) {
209        return Err(format!(
210            "{context} contains the forbidden noncharacter U+{:04X}",
211            ch as u32
212        ));
213    }
214    Ok(())
215}
216
217/// Check if an integer is in the I-JSON safe integer range `[-2^53+1, 2^53-1]`.
218#[doc(hidden)]
219#[must_use]
220pub fn is_safe_integer(value: i64) -> bool {
221    (-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&value)
222}
223
224// ── Internal implementation ────────────────────────────────────────
225
226const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991;
227
228fn to_canon_bytes_value(value: &Value) -> Result<Vec<u8>, JcsError> {
229    let mut out = Vec::new();
230    emit_value(&mut out, value)?;
231    Ok(out)
232}
233
234fn emit_value(out: &mut Vec<u8>, value: &Value) -> Result<(), JcsError> {
235    match value {
236        Value::Null => out.extend_from_slice(b"null"),
237        Value::Bool(boolean) => {
238            if *boolean {
239                out.extend_from_slice(b"true");
240            } else {
241                out.extend_from_slice(b"false");
242            }
243        }
244        Value::Number(number) => emit_number(out, number)?,
245        Value::String(string) => emit_string(out, string, "string value")?,
246        Value::Array(array) => {
247            out.push(b'[');
248            for (index, item) in array.iter().enumerate() {
249                if index > 0 {
250                    out.push(b',');
251                }
252                emit_value(out, item)?;
253            }
254            out.push(b']');
255        }
256        Value::Object(object) => {
257            out.push(b'{');
258            let mut entries: Vec<_> = object.iter().collect();
259            entries.sort_by(|(left, _), (right, _)| cmp_utf16(left, right));
260
261            for (index, (key, item)) in entries.iter().enumerate() {
262                if index > 0 {
263                    out.push(b',');
264                }
265                emit_string(out, key, "object property name")?;
266                out.push(b':');
267                emit_value(out, item)?;
268            }
269            out.push(b'}');
270        }
271    }
272    Ok(())
273}
274
275fn emit_number(out: &mut Vec<u8>, number: &Number) -> Result<(), JcsError> {
276    if let Some(value) = number.as_i64() {
277        ensure_exact_binary64_integer(value.unsigned_abs(), &value.to_string())?;
278        out.extend_from_slice(value.to_string().as_bytes());
279        return Ok(());
280    }
281
282    if let Some(value) = number.as_u64() {
283        ensure_exact_binary64_integer(value, &value.to_string())?;
284        out.extend_from_slice(value.to_string().as_bytes());
285        return Ok(());
286    }
287
288    if let Some(value) = number.as_f64() {
289        if !value.is_finite() {
290            return Err(JcsError::InvalidNumber(
291                "encountered a non-finite floating-point number".to_string(),
292            ));
293        }
294
295        let rendered = format_ecmascript_number(value)?;
296        out.extend_from_slice(rendered.as_bytes());
297        return Ok(());
298    }
299
300    Err(JcsError::InvalidNumber(
301        "unsupported JSON number representation".to_string(),
302    ))
303}
304
305fn emit_string(out: &mut Vec<u8>, value: &str, context: &str) -> Result<(), JcsError> {
306    validate_string_contents(value, context).map_err(JcsError::InvalidString)?;
307
308    out.push(b'"');
309    for ch in value.chars() {
310        match ch {
311            '"' => out.extend_from_slice(br#"\""#),
312            '\\' => out.extend_from_slice(br"\\"),
313            '\u{0008}' => out.extend_from_slice(br"\b"),
314            '\u{0009}' => out.extend_from_slice(br"\t"),
315            '\u{000A}' => out.extend_from_slice(br"\n"),
316            '\u{000C}' => out.extend_from_slice(br"\f"),
317            '\u{000D}' => out.extend_from_slice(br"\r"),
318            '\u{0000}'..='\u{001F}' => {
319                let escaped = format!(r"\u{:04x}", ch as u32);
320                out.extend_from_slice(escaped.as_bytes());
321            }
322            _ => {
323                let mut buf = [0u8; 4];
324                let encoded = ch.encode_utf8(&mut buf);
325                out.extend_from_slice(encoded.as_bytes());
326            }
327        }
328    }
329    out.push(b'"');
330
331    Ok(())
332}
333
334fn ensure_exact_binary64_integer(value: u64, original: &str) -> Result<(), JcsError> {
335    if is_exact_binary64_integer(value) {
336        Ok(())
337    } else {
338        Err(JcsError::InvalidNumber(format!(
339            "integer {original} is not exactly representable as an IEEE 754 double; encode it as a string"
340        )))
341    }
342}
343
344const fn is_exact_binary64_integer(value: u64) -> bool {
345    if value == 0 {
346        return true;
347    }
348    let bit_len = u64::BITS - value.leading_zeros();
349    bit_len <= 53 || value.trailing_zeros() >= bit_len - 53
350}
351
352fn format_ecmascript_number(value: f64) -> Result<String, JcsError> {
353    if value == 0.0 {
354        return Ok("0".to_string());
355    }
356
357    let mut buffer = zmij::Buffer::new();
358    let shortest = buffer.format_finite(value);
359    let (negative, body) = if let Some(stripped) = shortest.strip_prefix('-') {
360        (true, stripped)
361    } else {
362        (false, shortest)
363    };
364
365    let (digits, exponent) = parse_shortest_decimal(body)?;
366    let rendered = render_ecmascript_number(&digits, exponent)?;
367
368    if negative {
369        Ok(format!("-{rendered}"))
370    } else {
371        Ok(rendered)
372    }
373}
374
375fn parse_shortest_decimal(body: &str) -> Result<(String, i32), JcsError> {
376    if let Some((mantissa, exponent)) = body.split_once('e') {
377        let digits: String = mantissa.chars().filter(|&ch| ch != '.').collect();
378        let exponent = exponent.parse::<i32>().map_err(|error| {
379            JcsError::InvalidNumber(format!(
380                "failed to parse formatter exponent {exponent:?}: {error}"
381            ))
382        })?;
383        return Ok((digits, exponent + 1));
384    }
385
386    if let Some((integer, fractional)) = body.split_once('.') {
387        let fractional = fractional.trim_end_matches('0');
388
389        if integer != "0" {
390            let mut digits = String::with_capacity(integer.len() + fractional.len());
391            digits.push_str(integer);
392            digits.push_str(fractional);
393            let exponent = i32::try_from(integer.len()).map_err(|_| {
394                JcsError::InvalidNumber(
395                    "formatter emitted an unexpectedly large integer part".to_string(),
396                )
397            })?;
398            return Ok((digits, exponent));
399        }
400
401        let leading_zeros = fractional.bytes().take_while(|&byte| byte == b'0').count();
402        let exponent = i32::try_from(leading_zeros).map_err(|_| {
403            JcsError::InvalidNumber(
404                "formatter emitted an unexpectedly long leading-zero run".to_string(),
405            )
406        })?;
407        return Ok((fractional[leading_zeros..].to_owned(), -exponent));
408    }
409
410    let exponent = i32::try_from(body.len()).map_err(|_| {
411        JcsError::InvalidNumber("formatter emitted an unexpectedly long integer".to_string())
412    })?;
413    Ok((body.to_owned(), exponent))
414}
415
416fn render_ecmascript_number(digits: &str, exponent: i32) -> Result<String, JcsError> {
417    let digits_len = i32::try_from(digits.len()).map_err(|_| {
418        JcsError::InvalidNumber("formatter emitted an unexpectedly long digit sequence".to_string())
419    })?;
420    if digits_len == 0 {
421        return Err(JcsError::InvalidNumber("empty digit sequence".to_string()));
422    }
423
424    if digits_len <= exponent && exponent <= 21 {
425        let capacity = usize::try_from(exponent).map_err(|_| {
426            JcsError::InvalidNumber(
427                "formatter produced a negative fixed-width exponent".to_string(),
428            )
429        })?;
430        let mut out = String::with_capacity(capacity);
431        out.push_str(digits);
432        for _ in 0..(exponent - digits_len) {
433            out.push('0');
434        }
435        return Ok(out);
436    }
437
438    if 0 < exponent && exponent <= 21 {
439        let split = usize::try_from(exponent).map_err(|_| {
440            JcsError::InvalidNumber("formatter produced a negative split exponent".to_string())
441        })?;
442        let mut out = String::with_capacity(digits.len() + 1);
443        out.push_str(&digits[..split]);
444        out.push('.');
445        out.push_str(&digits[split..]);
446        return Ok(out);
447    }
448
449    if -6 < exponent && exponent <= 0 {
450        let zeros = usize::try_from(-exponent).map_err(|_| {
451            JcsError::InvalidNumber("formatter produced an invalid negative exponent".to_string())
452        })?;
453        let mut out = String::with_capacity(2 + zeros + digits.len());
454        out.push_str("0.");
455        for _ in 0..zeros {
456            out.push('0');
457        }
458        out.push_str(digits);
459        return Ok(out);
460    }
461
462    let exponent = exponent - 1;
463    let (first, rest) = digits.split_at(1);
464    let mut out = String::with_capacity(digits.len() + 6);
465    out.push_str(first);
466    if !rest.is_empty() {
467        out.push('.');
468        out.push_str(rest);
469    }
470    out.push('e');
471    if exponent >= 0 {
472        out.push('+');
473    }
474    out.push_str(&exponent.to_string());
475    Ok(out)
476}
477
478fn cmp_utf16(left: &str, right: &str) -> Ordering {
479    left.encode_utf16().cmp(right.encode_utf16())
480}
481
482fn is_noncharacter(ch: char) -> bool {
483    let code = ch as u32;
484    (0xFDD0..=0xFDEF).contains(&code) || (code <= 0x0010_FFFF && code & 0xFFFE == 0xFFFE)
485}
486
487fn parse_json_value_no_duplicates(json: &[u8]) -> Result<Value, serde_json::Error> {
488    let mut deserializer = serde_json::Deserializer::from_slice(json);
489    let value = deserialize_json_value_no_duplicates(&mut deserializer)?;
490    deserializer.end()?;
491    Ok(value)
492}
493
494struct NoDuplicateValueSeed;
495
496impl<'de> DeserializeSeed<'de> for NoDuplicateValueSeed {
497    type Value = Value;
498
499    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
500    where
501        D: Deserializer<'de>,
502    {
503        deserializer.deserialize_any(NoDuplicateValueVisitor)
504    }
505}
506
507struct NoDuplicateValueVisitor;
508
509impl<'de> Visitor<'de> for NoDuplicateValueVisitor {
510    type Value = Value;
511
512    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
513        formatter.write_str("a valid JSON value")
514    }
515
516    fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E> {
517        Ok(Value::Bool(value))
518    }
519
520    fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> {
521        Ok(Value::Number(Number::from(value)))
522    }
523
524    fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> {
525        Ok(Value::Number(Number::from(value)))
526    }
527
528    fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
529    where
530        E: de::Error,
531    {
532        Number::from_f64(value)
533            .map(Value::Number)
534            .ok_or_else(|| E::custom("encountered a non-finite floating-point number"))
535    }
536
537    fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
538    where
539        E: de::Error,
540    {
541        validate_string_contents(value, "string value").map_err(E::custom)?;
542        Ok(Value::String(value.to_owned()))
543    }
544
545    fn visit_borrowed_str<E>(self, value: &'de str) -> Result<Self::Value, E>
546    where
547        E: de::Error,
548    {
549        self.visit_str(value)
550    }
551
552    fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
553    where
554        E: de::Error,
555    {
556        validate_string_contents(&value, "string value").map_err(E::custom)?;
557        Ok(Value::String(value))
558    }
559
560    fn visit_none<E>(self) -> Result<Self::Value, E> {
561        Ok(Value::Null)
562    }
563
564    fn visit_unit<E>(self) -> Result<Self::Value, E> {
565        Ok(Value::Null)
566    }
567
568    fn visit_seq<A>(self, mut access: A) -> Result<Self::Value, A::Error>
569    where
570        A: SeqAccess<'de>,
571    {
572        let mut values = Vec::with_capacity(access.size_hint().unwrap_or(0));
573        while let Some(value) = access.next_element_seed(NoDuplicateValueSeed)? {
574            values.push(value);
575        }
576        Ok(Value::Array(values))
577    }
578
579    fn visit_map<A>(self, mut access: A) -> Result<Self::Value, A::Error>
580    where
581        A: MapAccess<'de>,
582    {
583        let Some(first_key) = access.next_key::<String>()? else {
584            return Ok(Value::Object(serde_json::Map::new()));
585        };
586
587        // Validate first key (skip internal '$'-prefixed keys used by
588        // serde_json for number representations under arbitrary_precision).
589        if !first_key.starts_with('$') {
590            validate_string_contents(&first_key, "object property name")
591                .map_err(A::Error::custom)?;
592        }
593
594        let first_value = access.next_value_seed(NoDuplicateValueSeed)?;
595
596        let mut object = serde_json::Map::new();
597        object.insert(first_key.clone(), first_value);
598
599        let mut seen = BTreeSet::new();
600        seen.insert(first_key);
601
602        while let Some(key) = access.next_key::<String>()? {
603            // Only validate user-facing keys (skip internal serde keys
604            // that start with '$'). This handles arbitrary_precision
605            // numbers without depending on private serde_json internals.
606            if !key.starts_with('$') {
607                validate_string_contents(&key, "object property name").map_err(A::Error::custom)?;
608            }
609
610            if !seen.insert(key.clone()) {
611                return Err(A::Error::custom(format!("duplicate property name `{key}`")));
612            }
613
614            let value = access.next_value_seed(NoDuplicateValueSeed)?;
615            object.insert(key, value);
616        }
617
618        // If the map is a serde_json internal number representation,
619        // serde_json::from_value will reconstruct the proper Number.
620        // For real JSON objects, this is a no-op identity conversion.
621        serde_json::from_value(Value::Object(object)).map_err(A::Error::custom)
622    }
623}
624
625#[cfg(test)]
626#[path = "lib_tests.rs"]
627mod tests;