vr_jcs/lib.rs
1//! # vr-jcs
2//!
3//! RFC 8785 JSON Canonicalization Scheme (JCS) for Rust.
4//!
5//! Produces canonical JSON suitable for deterministic digest computation,
6//! content hashing, and stable serialization boundaries. Implements the
7//! RFC 8785 rules that materially affect wire compatibility:
8//! - UTF-16 code-unit sorting for object property names
9//! - ECMAScript-compatible primitive serialization
10//! - UTF-8 output without insignificant whitespace
11//! - duplicate-property rejection on raw JSON parse paths
12//! - I-JSON string / number validation
13//!
14//! ## Module layout
15//!
16//! - [`canonicalize`] — RFC 8785 emit + in-place key sorting (also
17//! re-exports the public [`canonicalize()`](crate::canonicalize) function).
18//! - [`strict_parse`] — strict admission parser for untrusted JSON
19//! (duplicate-key rejection, I-JSON validation, depth limit).
20//! - `digest` — strategy-bearing canonical-bytes digest API.
21//! - `canonical_bytes` — [`CanonicalBytes`] newtype boundary.
22//! - `number` — internal ECMAScript number rendering (RFC 8785 §3.2.6).
23//! - [`error`](crate) — [`JcsError`] / [`JcsErrorInfo`].
24//!
25//! ## API
26//!
27//! ### Strict path (for untrusted JSON)
28//!
29//! - [`to_canon_bytes_from_slice`] — Parse untrusted JSON, apply strict admission checks, emit canonical bytes
30//! - [`to_canon_string_from_str`] — Parse untrusted JSON string, apply strict admission checks, emit canonical string
31//!
32//! ### Typed path (caller-controlled construction only, deprecated)
33//!
34//! - [`to_canon_bytes`] — Serialize any `Serialize` type to canonical JSON bytes
35//! - [`to_canon_string`] — Serialize any `Serialize` type to a canonical JSON string
36//!
37//! ### In-place
38//!
39//! - [`canonicalize()`](crate::canonicalize) — Sort object keys recursively in a `serde_json::Value`
40//! and validate I-JSON strings + numbers.
41//!
42//! ### Canonical digest
43//!
44//! Canonicalization is a schema decision; digest algorithm choice (BLAKE3 vs.
45//! keyed BLAKE3 vs. domain-separated BLAKE3 vs. SHA-256 vs. …) is a separate
46//! cryptographic / governance decision. The digest surface reflects that split:
47//!
48//! **Strategy-bearing (primary) path** — for any site whose digest algorithm
49//! is or may become a policy variable:
50//!
51//! - [`DigestAlgorithm`] — the algorithm enum.
52//! - [`DigestStrategy`] — an algorithm plus any future policy knobs.
53//! - [`CanonicalDigest`] — typed output that remembers which algorithm produced it.
54//! - [`to_canon_digest_with`] — canonicalize `value`, digest under `strategy`.
55//!
56//! **BLAKE3 fixed-policy convenience** — for sites where receipt policy has
57//! explicitly frozen the algorithm to plain BLAKE3:
58//!
59//! - [`to_canon_blake3_digest`] — `&Value` → `[u8; 32]`.
60//! - [`to_canon_blake3_digest_from_slice`] — strict-parse `&[u8]` → `[u8; 32]`.
61//!
62//! These convenience wrappers are equivalent to calling
63//! [`to_canon_digest_with`] with [`DigestStrategy::blake3_untagged`] and
64//! extracting `bytes`.
65//!
66//! The lexical invariant is: canonicalization and digest must travel together
67//! through one call. Receipt-bound and constitutional code paths MUST use the
68//! strategy-bearing or fixed-BLAKE3 wrappers instead of pairing
69//! `to_canon_bytes_*` with `blake3::hash` manually.
70//!
71//! ## Usage
72//!
73//! ```
74//! # fn main() -> Result<(), vr_jcs::JcsError> {
75//! let json = vr_jcs::to_canon_string_from_str(r#"{"z_field":1,"a_field":2}"#)?;
76//! assert_eq!(json, r#"{"a_field":2,"z_field":1}"#);
77//! # Ok(())
78//! # }
79//! ```
80
81use serde::Serialize;
82
83/// Maximum permitted nesting depth for JSON structures (128).
84pub const MAX_NESTING_DEPTH: usize = 128;
85
86mod canonical_bytes;
87pub mod canonicalize;
88mod digest;
89mod error;
90mod number;
91pub mod strict_parse;
92
93pub use canonical_bytes::CanonicalBytes;
94pub use canonicalize::canonicalize;
95pub use digest::{
96 to_canon_blake3_digest, to_canon_blake3_digest_from_slice, to_canon_digest_with,
97 CanonicalDigest, DigestAlgorithm, DigestStrategy,
98};
99pub use error::{JcsError, JcsErrorInfo};
100
101// Backward-compatible top-level re-exports of strict_parse helpers.
102// `vertrule-schemas` and any other sibling-crate consumer reaches these
103// through `vr_jcs::*`; the module path `vr_jcs::strict_parse::*` is the
104// preferred location for new code.
105#[doc(hidden)]
106pub use strict_parse::{
107 deserialize_json_value_no_duplicates, is_safe_integer, validate_string_contents,
108};
109
110// Crate-private re-export so `lib_tests.rs` (an internal test module
111// brought in via `#[path = "lib_tests.rs"] mod tests`) can keep its
112// `super::*` import shape after the canonicalize-module split.
113#[cfg(test)]
114pub(crate) use canonicalize::to_canon_bytes_value;
115
116// ── Public API ─────────────────────────────────────────────────────
117
118/// Serialize any `Serialize` type to canonical JSON bytes.
119///
120/// The typed `Serialize` path is not authoritative for untrusted raw JSON
121/// because it does not control parse-time object-member admission. For
122/// untrusted input, use [`to_canon_bytes_from_slice`] instead.
123///
124/// # Errors
125///
126/// Returns:
127/// - [`JcsError::Json`] if serialization to JSON fails
128/// - [`JcsError::InvalidString`] if a string contains an I-JSON forbidden code point
129/// - [`JcsError::InvalidNumber`] if a number is not interoperable under JCS
130/// - [`JcsError::NestingDepthExceeded`] if the value exceeds [`MAX_NESTING_DEPTH`]
131#[deprecated(
132 since = "0.3.0",
133 note = "use to_canon_bytes_from_slice for untrusted input; see PUBLIC_SURFACE.md"
134)]
135pub fn to_canon_bytes<T: Serialize>(value: &T) -> Result<Vec<u8>, JcsError> {
136 let value = serde_json::to_value(value)?;
137 canonicalize::to_canon_bytes_value(&value)
138}
139
140/// Serialize any `Serialize` type to a canonical JSON string.
141///
142/// # Errors
143///
144/// Returns:
145/// - [`JcsError::Json`] if serialization to JSON fails
146/// - [`JcsError::InvalidString`] if a string contains an I-JSON forbidden code point
147/// - [`JcsError::InvalidNumber`] if a number is not interoperable under JCS
148/// - [`JcsError::NestingDepthExceeded`] if the value exceeds [`MAX_NESTING_DEPTH`]
149#[deprecated(
150 since = "0.3.0",
151 note = "use to_canon_string_from_str for untrusted input; see PUBLIC_SURFACE.md"
152)]
153pub fn to_canon_string<T: Serialize>(value: &T) -> Result<String, JcsError> {
154 let value = serde_json::to_value(value)?;
155 let bytes = canonicalize::to_canon_bytes_value(&value)?;
156 String::from_utf8(bytes).map_err(|error| {
157 JcsError::InvalidString(format!(
158 "canonical JSON output was not valid UTF-8: {error}"
159 ))
160 })
161}
162
163/// Parse untrusted JSON, apply strict admission checks, and emit canonical
164/// RFC 8785 bytes.
165///
166/// Rejects duplicate property names, validates I-JSON string and number
167/// constraints, and enforces [`MAX_NESTING_DEPTH`]. Accepts any valid JSON
168/// formatting (including pretty-printed input) and canonicalizes it.
169///
170/// # Errors
171///
172/// Returns [`JcsError::Json`] for malformed JSON or duplicate property names,
173/// [`JcsError::InvalidString`] or [`JcsError::InvalidNumber`] for I-JSON
174/// violations, and [`JcsError::NestingDepthExceeded`] for depth limit breach.
175pub fn to_canon_bytes_from_slice(json: &[u8]) -> Result<Vec<u8>, JcsError> {
176 let value = strict_parse::parse_json_value_no_duplicates(json)?;
177 canonicalize::to_canon_bytes_value(&value)
178}
179
180/// Parse untrusted JSON text, apply strict admission checks, and emit a
181/// canonical RFC 8785 string.
182///
183/// # Errors
184///
185/// Returns the same errors as [`to_canon_bytes_from_slice`].
186pub fn to_canon_string_from_str(json: &str) -> Result<String, JcsError> {
187 let bytes = to_canon_bytes_from_slice(json.as_bytes())?;
188 String::from_utf8(bytes).map_err(|error| {
189 JcsError::InvalidString(format!(
190 "canonical JSON output was not valid UTF-8: {error}"
191 ))
192 })
193}
194
195/// Parse untrusted JSON, apply strict admission checks, and return the
196/// canonical RFC 8785 bytes inside a [`CanonicalBytes`] wrapper.
197///
198/// Prefer this over [`to_canon_bytes_from_slice`] for any path that will
199/// feed the bytes into a digest, signature, or receipt primitive — the
200/// wrapper makes "came out of JCS" a type-level fact.
201///
202/// # Errors
203///
204/// Returns the same errors as [`to_canon_bytes_from_slice`].
205pub fn canonical_bytes_from_slice(json: &[u8]) -> Result<CanonicalBytes, JcsError> {
206 to_canon_bytes_from_slice(json).map(CanonicalBytes::from_jcs)
207}
208
209#[cfg(test)]
210#[path = "lib_tests.rs"]
211mod tests;