Skip to main content

vr_jcs/
lib.rs

1//! # `VertRule` JCS Glovebox
2//!
3//! **RFC 8785 JSON Canonicalization Scheme (JCS)**
4//!
5//! This crate is the single authorized location for JSON canonicalization
6//! in the `VertRule` ecosystem. All receipt serialization and digest computation
7//! MUST use these functions to ensure deterministic hashing.
8//!
9//! The implementation enforces the RFC 8785 rules that materially affect wire
10//! compatibility:
11//! - UTF-16 code-unit sorting for object property names
12//! - ECMAScript-compatible primitive serialization
13//! - UTF-8 output without insignificant whitespace
14//! - duplicate-property rejection on raw JSON parse paths
15//! - I-JSON string / number validation
16//!
17//! ## API
18//!
19//! - [`to_canon_bytes`] — Serialize any `Serialize` type to canonical JSON bytes
20//! - [`to_canon_string`] — Serialize any `Serialize` type to a canonical JSON string
21//! - [`to_canon_bytes_from_slice`] — Parse raw JSON and return canonical bytes (rejects duplicates)
22//! - [`to_canon_string_from_str`] — Parse raw JSON string and return canonical string
23//! - [`canonicalize`] — Sort object keys recursively in a `serde_json::Value` (in-place)
24//!
25//! ## Usage
26//!
27//! ```
28//! use vr_jcs::to_canon_string;
29//! use serde::Serialize;
30//!
31//! #[derive(Serialize)]
32//! struct Receipt {
33//!     z_field: u64,
34//!     a_field: u64,
35//! }
36//!
37//! let receipt = Receipt { z_field: 1, a_field: 2 };
38//! let json = to_canon_string(&receipt).expect("serialization");
39//! assert_eq!(json, r#"{"a_field":2,"z_field":1}"#);
40//! ```
41//!
42//! ## Enforcement
43//!
44//! Any code path that computes a digest over JSON MUST use this crate.
45//! Using `serde_json::to_string()` directly for digest input is forbidden.
46
47#![deny(clippy::unwrap_used)]
48#![deny(clippy::expect_used)]
49#![deny(clippy::panic)]
50#![warn(missing_docs)]
51
52use std::cmp::Ordering;
53use std::collections::BTreeSet;
54
55use serde::de::{self, DeserializeSeed, Error as DeError, MapAccess, SeqAccess, Visitor};
56use serde::{Deserializer, Serialize};
57use serde_json::{Number, Value};
58
59/// Error type for canonical JSON operations.
60#[derive(Debug)]
61pub enum JcsError {
62    /// JSON serialization or deserialization failed.
63    Json(serde_json::Error),
64    /// A JSON string violated I-JSON constraints.
65    InvalidString(String),
66    /// A JSON number violated JCS / I-JSON constraints.
67    InvalidNumber(String),
68}
69
70impl std::fmt::Display for JcsError {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        match self {
73            Self::Json(e) => write!(f, "JCS JSON processing failed: {e}"),
74            Self::InvalidString(msg) => write!(f, "JCS string validation failed: {msg}"),
75            Self::InvalidNumber(msg) => write!(f, "JCS number validation failed: {msg}"),
76        }
77    }
78}
79
80impl std::error::Error for JcsError {
81    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
82        match self {
83            Self::Json(e) => Some(e),
84            Self::InvalidString(_) | Self::InvalidNumber(_) => None,
85        }
86    }
87}
88
89impl From<serde_json::Error> for JcsError {
90    fn from(error: serde_json::Error) -> Self {
91        Self::Json(error)
92    }
93}
94
95// ── Public API ─────────────────────────────────────────────────────
96
97/// Serialize any `Serialize` type to canonical JSON bytes.
98///
99/// This is the blessed serializer for all digest and signature inputs.
100///
101/// # Errors
102///
103/// Returns:
104/// - [`JcsError::Json`] if serialization to JSON fails
105/// - [`JcsError::InvalidString`] if a string contains an I-JSON forbidden code point
106/// - [`JcsError::InvalidNumber`] if a number is not interoperable under JCS
107pub fn to_canon_bytes<T: Serialize>(value: &T) -> Result<Vec<u8>, JcsError> {
108    let value = serde_json::to_value(value)?;
109    to_canon_bytes_value(&value)
110}
111
112/// Serialize any `Serialize` type to a canonical JSON string.
113///
114/// # Errors
115///
116/// Returns the same errors as [`to_canon_bytes`].
117pub fn to_canon_string<T: Serialize>(value: &T) -> Result<String, JcsError> {
118    let bytes = to_canon_bytes(value)?;
119    String::from_utf8(bytes).map_err(|error| {
120        JcsError::InvalidString(format!(
121            "canonical JSON output was not valid UTF-8: {error}"
122        ))
123    })
124}
125
126/// Parse raw JSON text and return canonical JSON bytes.
127///
128/// Unlike [`to_canon_bytes`], this function rejects duplicate property names
129/// because it sees the original JSON syntax before it is collapsed into
130/// `serde_json::Value`.
131///
132/// # Errors
133///
134/// Returns the same errors as [`to_canon_bytes`], plus [`JcsError::Json`] for
135/// malformed JSON or duplicate property names.
136pub fn to_canon_bytes_from_slice(json: &[u8]) -> Result<Vec<u8>, JcsError> {
137    let value = parse_json_value_no_duplicates(json)?;
138    to_canon_bytes_value(&value)
139}
140
141/// Parse raw JSON text and return a canonical JSON string.
142///
143/// # Errors
144///
145/// Returns the same errors as [`to_canon_bytes_from_slice`].
146pub fn to_canon_string_from_str(json: &str) -> Result<String, JcsError> {
147    let bytes = to_canon_bytes_from_slice(json.as_bytes())?;
148    String::from_utf8(bytes).map_err(|error| {
149        JcsError::InvalidString(format!(
150            "canonical JSON output was not valid UTF-8: {error}"
151        ))
152    })
153}
154
155/// Recursively sort all object keys in a JSON value for canonical representation.
156///
157/// This function modifies the value in place, sorting all object keys
158/// by UTF-16 code units (RFC 8785) and recursively processing nested
159/// structures. Array element order is preserved.
160///
161/// For digest computation, prefer [`to_canon_bytes`] which handles the
162/// full RFC 8785 pipeline including number rendering and string validation.
163pub fn canonicalize(v: &mut Value) {
164    match v {
165        Value::Object(map) => {
166            let keys: Vec<String> = map.keys().cloned().collect();
167            let mut entries: Vec<(String, Value)> = keys
168                .into_iter()
169                .filter_map(|k| map.remove(&k).map(|v| (k, v)))
170                .collect();
171            entries.sort_by(|(a, _), (b, _)| cmp_utf16(a, b));
172            for (key, mut value) in entries {
173                canonicalize(&mut value);
174                map.insert(key, value);
175            }
176        }
177        Value::Array(arr) => {
178            for x in arr {
179                canonicalize(x);
180            }
181        }
182        _ => {}
183    }
184}
185
186// ── Provisional helpers for sibling crates ─────────────────────────
187//
188// Not part of the stable v0.2 API. Subject to change or removal
189// without semver bump. If still needed at publish time, these will
190// be gated behind `feature = "unstable"`.
191
192/// Deserialize a JSON value while rejecting duplicate property names.
193///
194/// Used by `vertrule-schemas` for ingestion validation.
195///
196/// # Errors
197///
198/// Returns an error if the input contains duplicate property names,
199/// forbidden noncharacters, or is otherwise invalid JSON.
200#[doc(hidden)]
201pub fn deserialize_json_value_no_duplicates<'de, D>(deserializer: D) -> Result<Value, D::Error>
202where
203    D: Deserializer<'de>,
204{
205    NoDuplicateValueSeed.deserialize(deserializer)
206}
207
208/// Validate that a string contains no I-JSON forbidden noncharacters.
209///
210/// # Errors
211///
212/// Returns a description of the violation if the string contains a
213/// forbidden Unicode noncharacter (U+FDD0..U+FDEF, U+xFFFE, U+xFFFF).
214#[doc(hidden)]
215pub fn validate_string_contents(value: &str, context: &str) -> Result<(), String> {
216    if let Some(ch) = value.chars().find(|&ch| is_noncharacter(ch)) {
217        return Err(format!(
218            "{context} contains the forbidden noncharacter U+{:04X}",
219            ch as u32
220        ));
221    }
222    Ok(())
223}
224
225/// Check if an integer is in the I-JSON safe integer range `[-2^53+1, 2^53-1]`.
226#[doc(hidden)]
227#[must_use]
228pub fn is_safe_integer(value: i64) -> bool {
229    (-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&value)
230}
231
232// ── Internal implementation ────────────────────────────────────────
233
234const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991;
235
236fn to_canon_bytes_value(value: &Value) -> Result<Vec<u8>, JcsError> {
237    let mut out = Vec::new();
238    emit_value(&mut out, value)?;
239    Ok(out)
240}
241
242fn emit_value(out: &mut Vec<u8>, value: &Value) -> Result<(), JcsError> {
243    match value {
244        Value::Null => out.extend_from_slice(b"null"),
245        Value::Bool(boolean) => {
246            if *boolean {
247                out.extend_from_slice(b"true");
248            } else {
249                out.extend_from_slice(b"false");
250            }
251        }
252        Value::Number(number) => emit_number(out, number)?,
253        Value::String(string) => emit_string(out, string, "string value")?,
254        Value::Array(array) => {
255            out.push(b'[');
256            for (index, item) in array.iter().enumerate() {
257                if index > 0 {
258                    out.push(b',');
259                }
260                emit_value(out, item)?;
261            }
262            out.push(b']');
263        }
264        Value::Object(object) => {
265            out.push(b'{');
266            let mut entries: Vec<_> = object.iter().collect();
267            entries.sort_by(|(left, _), (right, _)| cmp_utf16(left, right));
268
269            for (index, (key, item)) in entries.iter().enumerate() {
270                if index > 0 {
271                    out.push(b',');
272                }
273                emit_string(out, key, "object property name")?;
274                out.push(b':');
275                emit_value(out, item)?;
276            }
277            out.push(b'}');
278        }
279    }
280    Ok(())
281}
282
283fn emit_number(out: &mut Vec<u8>, number: &Number) -> Result<(), JcsError> {
284    if let Some(value) = number.as_i64() {
285        ensure_exact_binary64_integer(value.unsigned_abs(), &value.to_string())?;
286        out.extend_from_slice(value.to_string().as_bytes());
287        return Ok(());
288    }
289
290    if let Some(value) = number.as_u64() {
291        ensure_exact_binary64_integer(value, &value.to_string())?;
292        out.extend_from_slice(value.to_string().as_bytes());
293        return Ok(());
294    }
295
296    if let Some(value) = number.as_f64() {
297        if !value.is_finite() {
298            return Err(JcsError::InvalidNumber(
299                "encountered a non-finite floating-point number".to_string(),
300            ));
301        }
302
303        let rendered = format_ecmascript_number(value)?;
304        out.extend_from_slice(rendered.as_bytes());
305        return Ok(());
306    }
307
308    Err(JcsError::InvalidNumber(
309        "unsupported JSON number representation".to_string(),
310    ))
311}
312
313fn emit_string(out: &mut Vec<u8>, value: &str, context: &str) -> Result<(), JcsError> {
314    validate_string_contents(value, context).map_err(JcsError::InvalidString)?;
315
316    out.push(b'"');
317    for ch in value.chars() {
318        match ch {
319            '"' => out.extend_from_slice(br#"\""#),
320            '\\' => out.extend_from_slice(br"\\"),
321            '\u{0008}' => out.extend_from_slice(br"\b"),
322            '\u{0009}' => out.extend_from_slice(br"\t"),
323            '\u{000A}' => out.extend_from_slice(br"\n"),
324            '\u{000C}' => out.extend_from_slice(br"\f"),
325            '\u{000D}' => out.extend_from_slice(br"\r"),
326            '\u{0000}'..='\u{001F}' => {
327                let escaped = format!(r"\u{:04x}", ch as u32);
328                out.extend_from_slice(escaped.as_bytes());
329            }
330            _ => {
331                let mut buf = [0u8; 4];
332                let encoded = ch.encode_utf8(&mut buf);
333                out.extend_from_slice(encoded.as_bytes());
334            }
335        }
336    }
337    out.push(b'"');
338
339    Ok(())
340}
341
342fn ensure_exact_binary64_integer(value: u64, original: &str) -> Result<(), JcsError> {
343    if is_exact_binary64_integer(value) {
344        Ok(())
345    } else {
346        Err(JcsError::InvalidNumber(format!(
347            "integer {original} is not exactly representable as an IEEE 754 double; encode it as a string"
348        )))
349    }
350}
351
352const fn is_exact_binary64_integer(value: u64) -> bool {
353    if value == 0 {
354        return true;
355    }
356    let bit_len = u64::BITS - value.leading_zeros();
357    bit_len <= 53 || value.trailing_zeros() >= bit_len - 53
358}
359
360fn format_ecmascript_number(value: f64) -> Result<String, JcsError> {
361    if value == 0.0 {
362        return Ok("0".to_string());
363    }
364
365    let mut buffer = zmij::Buffer::new();
366    let shortest = buffer.format_finite(value);
367    let (negative, body) = if let Some(stripped) = shortest.strip_prefix('-') {
368        (true, stripped)
369    } else {
370        (false, shortest)
371    };
372
373    let (digits, exponent) = parse_shortest_decimal(body)?;
374    let rendered = render_ecmascript_number(&digits, exponent)?;
375
376    if negative {
377        Ok(format!("-{rendered}"))
378    } else {
379        Ok(rendered)
380    }
381}
382
383fn parse_shortest_decimal(body: &str) -> Result<(String, i32), JcsError> {
384    if let Some((mantissa, exponent)) = body.split_once('e') {
385        let digits: String = mantissa.chars().filter(|&ch| ch != '.').collect();
386        let exponent = exponent.parse::<i32>().map_err(|error| {
387            JcsError::InvalidNumber(format!(
388                "failed to parse formatter exponent {exponent:?}: {error}"
389            ))
390        })?;
391        return Ok((digits, exponent + 1));
392    }
393
394    if let Some((integer, fractional)) = body.split_once('.') {
395        let fractional = fractional.trim_end_matches('0');
396
397        if integer != "0" {
398            let mut digits = String::with_capacity(integer.len() + fractional.len());
399            digits.push_str(integer);
400            digits.push_str(fractional);
401            let exponent = i32::try_from(integer.len()).map_err(|_| {
402                JcsError::InvalidNumber(
403                    "formatter emitted an unexpectedly large integer part".to_string(),
404                )
405            })?;
406            return Ok((digits, exponent));
407        }
408
409        let leading_zeros = fractional.bytes().take_while(|&byte| byte == b'0').count();
410        let exponent = i32::try_from(leading_zeros).map_err(|_| {
411            JcsError::InvalidNumber(
412                "formatter emitted an unexpectedly long leading-zero run".to_string(),
413            )
414        })?;
415        return Ok((fractional[leading_zeros..].to_owned(), -exponent));
416    }
417
418    let exponent = i32::try_from(body.len()).map_err(|_| {
419        JcsError::InvalidNumber("formatter emitted an unexpectedly long integer".to_string())
420    })?;
421    Ok((body.to_owned(), exponent))
422}
423
424fn render_ecmascript_number(digits: &str, exponent: i32) -> Result<String, JcsError> {
425    let digits_len = i32::try_from(digits.len()).map_err(|_| {
426        JcsError::InvalidNumber("formatter emitted an unexpectedly long digit sequence".to_string())
427    })?;
428    if digits_len == 0 {
429        return Err(JcsError::InvalidNumber("empty digit sequence".to_string()));
430    }
431
432    if digits_len <= exponent && exponent <= 21 {
433        let capacity = usize::try_from(exponent).map_err(|_| {
434            JcsError::InvalidNumber(
435                "formatter produced a negative fixed-width exponent".to_string(),
436            )
437        })?;
438        let mut out = String::with_capacity(capacity);
439        out.push_str(digits);
440        for _ in 0..(exponent - digits_len) {
441            out.push('0');
442        }
443        return Ok(out);
444    }
445
446    if 0 < exponent && exponent <= 21 {
447        let split = usize::try_from(exponent).map_err(|_| {
448            JcsError::InvalidNumber("formatter produced a negative split exponent".to_string())
449        })?;
450        let mut out = String::with_capacity(digits.len() + 1);
451        out.push_str(&digits[..split]);
452        out.push('.');
453        out.push_str(&digits[split..]);
454        return Ok(out);
455    }
456
457    if -6 < exponent && exponent <= 0 {
458        let zeros = usize::try_from(-exponent).map_err(|_| {
459            JcsError::InvalidNumber("formatter produced an invalid negative exponent".to_string())
460        })?;
461        let mut out = String::with_capacity(2 + zeros + digits.len());
462        out.push_str("0.");
463        for _ in 0..zeros {
464            out.push('0');
465        }
466        out.push_str(digits);
467        return Ok(out);
468    }
469
470    let exponent = exponent - 1;
471    let (first, rest) = digits.split_at(1);
472    let mut out = String::with_capacity(digits.len() + 6);
473    out.push_str(first);
474    if !rest.is_empty() {
475        out.push('.');
476        out.push_str(rest);
477    }
478    out.push('e');
479    if exponent >= 0 {
480        out.push('+');
481    }
482    out.push_str(&exponent.to_string());
483    Ok(out)
484}
485
486fn cmp_utf16(left: &str, right: &str) -> Ordering {
487    left.encode_utf16().cmp(right.encode_utf16())
488}
489
490fn is_noncharacter(ch: char) -> bool {
491    let code = ch as u32;
492    (0xFDD0..=0xFDEF).contains(&code) || (code <= 0x0010_FFFF && code & 0xFFFE == 0xFFFE)
493}
494
495fn parse_json_value_no_duplicates(json: &[u8]) -> Result<Value, serde_json::Error> {
496    let mut deserializer = serde_json::Deserializer::from_slice(json);
497    let value = deserialize_json_value_no_duplicates(&mut deserializer)?;
498    deserializer.end()?;
499    Ok(value)
500}
501
502struct NoDuplicateValueSeed;
503
504impl<'de> DeserializeSeed<'de> for NoDuplicateValueSeed {
505    type Value = Value;
506
507    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
508    where
509        D: Deserializer<'de>,
510    {
511        deserializer.deserialize_any(NoDuplicateValueVisitor)
512    }
513}
514
515struct NoDuplicateValueVisitor;
516
517impl<'de> Visitor<'de> for NoDuplicateValueVisitor {
518    type Value = Value;
519
520    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
521        formatter.write_str("a valid JSON value")
522    }
523
524    fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E> {
525        Ok(Value::Bool(value))
526    }
527
528    fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> {
529        Ok(Value::Number(Number::from(value)))
530    }
531
532    fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> {
533        Ok(Value::Number(Number::from(value)))
534    }
535
536    fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
537    where
538        E: de::Error,
539    {
540        Number::from_f64(value)
541            .map(Value::Number)
542            .ok_or_else(|| E::custom("encountered a non-finite floating-point number"))
543    }
544
545    fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
546    where
547        E: de::Error,
548    {
549        validate_string_contents(value, "string value").map_err(E::custom)?;
550        Ok(Value::String(value.to_owned()))
551    }
552
553    fn visit_borrowed_str<E>(self, value: &'de str) -> Result<Self::Value, E>
554    where
555        E: de::Error,
556    {
557        self.visit_str(value)
558    }
559
560    fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
561    where
562        E: de::Error,
563    {
564        validate_string_contents(&value, "string value").map_err(E::custom)?;
565        Ok(Value::String(value))
566    }
567
568    fn visit_none<E>(self) -> Result<Self::Value, E> {
569        Ok(Value::Null)
570    }
571
572    fn visit_unit<E>(self) -> Result<Self::Value, E> {
573        Ok(Value::Null)
574    }
575
576    fn visit_seq<A>(self, mut access: A) -> Result<Self::Value, A::Error>
577    where
578        A: SeqAccess<'de>,
579    {
580        let mut values = Vec::with_capacity(access.size_hint().unwrap_or(0));
581        while let Some(value) = access.next_element_seed(NoDuplicateValueSeed)? {
582            values.push(value);
583        }
584        Ok(Value::Array(values))
585    }
586
587    fn visit_map<A>(self, mut access: A) -> Result<Self::Value, A::Error>
588    where
589        A: MapAccess<'de>,
590    {
591        let Some(first_key) = access.next_key::<String>()? else {
592            return Ok(Value::Object(serde_json::Map::new()));
593        };
594
595        // Validate first key (skip internal '$'-prefixed keys used by
596        // serde_json for number representations under arbitrary_precision).
597        if !first_key.starts_with('$') {
598            validate_string_contents(&first_key, "object property name")
599                .map_err(A::Error::custom)?;
600        }
601
602        let first_value = access.next_value_seed(NoDuplicateValueSeed)?;
603
604        let mut object = serde_json::Map::new();
605        object.insert(first_key.clone(), first_value);
606
607        let mut seen = BTreeSet::new();
608        seen.insert(first_key);
609
610        while let Some(key) = access.next_key::<String>()? {
611            // Only validate user-facing keys (skip internal serde keys
612            // that start with '$'). This handles arbitrary_precision
613            // numbers without depending on private serde_json internals.
614            if !key.starts_with('$') {
615                validate_string_contents(&key, "object property name").map_err(A::Error::custom)?;
616            }
617
618            if !seen.insert(key.clone()) {
619                return Err(A::Error::custom(format!("duplicate property name `{key}`")));
620            }
621
622            let value = access.next_value_seed(NoDuplicateValueSeed)?;
623            object.insert(key, value);
624        }
625
626        // If the map is a serde_json internal number representation,
627        // serde_json::from_value will reconstruct the proper Number.
628        // For real JSON objects, this is a no-op identity conversion.
629        serde_json::from_value(Value::Object(object)).map_err(A::Error::custom)
630    }
631}
632
633#[cfg(test)]
634#[path = "lib_tests.rs"]
635mod tests;