Skip to main content

aion_context/
jcs.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2//! RFC 8785 JSON Canonicalization Scheme — RFC-0031.
3//!
4//! Thin wrapper around [`serde_jcs`] with aion-typed errors. Use
5//! this module when you need JSON bytes that are identical
6//! across Rust, Go, Python, JavaScript, and any other JCS-conformant
7//! implementation for the same logical document.
8//!
9//! This module is **additive**. Existing `canonical_bytes()` methods
10//! on [`crate::slsa::InTotoStatement`], [`crate::aibom::AiBom`], and
11//! [`crate::oci::OciArtifactManifest`] keep their current
12//! (serde-declaration-order) semantics so historical DSSE-signed
13//! envelopes continue to verify. Reach for JCS at new call sites —
14//! content-addressed catalogs, transparency-log entries, multi-
15//! implementation reproducibility audits.
16//!
17//! # Example
18//!
19//! ```
20//! use aion_context::jcs;
21//! use serde_json::json;
22//!
23//! let v = json!({ "b": 1, "a": 2 });
24//! let bytes = jcs::to_jcs_bytes(&v).unwrap();
25//! // Keys emerge in lexicographic UTF-16 order.
26//! assert_eq!(bytes, b"{\"a\":2,\"b\":1}");
27//! ```
28
29use serde::Serialize;
30
31use crate::{AionError, Result};
32
33/// Serialize any `serde::Serialize` value to RFC 8785 canonical
34/// JSON bytes.
35///
36/// # Errors
37///
38/// Returns `Err` if the value fails to serialize via serde.
39pub fn to_jcs_bytes<T: Serialize>(value: &T) -> Result<Vec<u8>> {
40    serde_jcs::to_vec(value).map_err(|e| AionError::InvalidFormat {
41        reason: format!("JCS serialization failed: {e}"),
42    })
43}
44
45/// Canonicalize existing JSON bytes.
46///
47/// Parses `bytes` into a `serde_json::Value` and re-emits it in
48/// RFC 8785 canonical form. Safe to feed any UTF-8 JSON document.
49///
50/// # Errors
51///
52/// Returns `Err` if the input is not valid JSON or the canonical
53/// re-emission fails.
54pub fn canonicalize_json_bytes(bytes: &[u8]) -> Result<Vec<u8>> {
55    let value: serde_json::Value =
56        serde_json::from_slice(bytes).map_err(|e| AionError::InvalidFormat {
57            reason: format!("JCS input is not valid JSON: {e}"),
58        })?;
59    to_jcs_bytes(&value)
60}
61
62#[cfg(test)]
63#[allow(clippy::unwrap_used, clippy::indexing_slicing)]
64mod tests {
65    use super::*;
66    use serde_json::json;
67
68    #[test]
69    fn keys_are_sorted_lex() {
70        let v = json!({ "c": 3, "a": 1, "b": 2 });
71        let bytes = to_jcs_bytes(&v).unwrap();
72        assert_eq!(bytes, br#"{"a":1,"b":2,"c":3}"#);
73    }
74
75    #[test]
76    fn arrays_preserve_order() {
77        let v = json!([3, 1, 2]);
78        let bytes = to_jcs_bytes(&v).unwrap();
79        assert_eq!(bytes, b"[3,1,2]");
80    }
81
82    #[test]
83    fn empty_object_and_array() {
84        assert_eq!(to_jcs_bytes(&json!({})).unwrap(), b"{}");
85        assert_eq!(to_jcs_bytes(&json!([])).unwrap(), b"[]");
86    }
87
88    #[test]
89    fn canonicalize_bytes_round_trip_reorders_keys() {
90        let raw = br#"{"z":1,"a":2}"#;
91        let canonical = canonicalize_json_bytes(raw).unwrap();
92        assert_eq!(canonical, br#"{"a":2,"z":1}"#);
93    }
94
95    #[test]
96    fn canonicalize_rejects_invalid_json() {
97        assert!(canonicalize_json_bytes(b"{not json").is_err());
98    }
99
100    #[test]
101    fn idempotent_on_already_canonical_json() {
102        let v = json!({"a": 1, "b": [true, false, null]});
103        let once = to_jcs_bytes(&v).unwrap();
104        let twice = canonicalize_json_bytes(&once).unwrap();
105        assert_eq!(once, twice);
106    }
107
108    mod properties {
109        use super::*;
110        use hegel::generators as gs;
111
112        /// Largest integer safely representable in an ECMAScript
113        /// `Number` (`2^53 - 1`). JCS formats numbers through
114        /// ECMAScript rules, so integers outside this range do not
115        /// survive a JCS → parse round-trip as the same `i64` value.
116        const JS_MAX_SAFE_INTEGER: i64 = (1_i64 << 53) - 1;
117        const JS_MIN_SAFE_INTEGER: i64 = -JS_MAX_SAFE_INTEGER;
118
119        fn draw_value(tc: &hegel::TestCase) -> serde_json::Value {
120            // Build an object with 0..6 string-keyed primitive entries.
121            let n = tc.draw(gs::integers::<usize>().max_value(6));
122            let mut map = serde_json::Map::new();
123            let mut counter: u64 = 0;
124            for _ in 0..n {
125                let key = format!("k_{counter}");
126                counter = counter.saturating_add(1);
127                let pick = tc.draw(gs::integers::<u8>().max_value(3));
128                let value = match pick {
129                    0 => serde_json::Value::Null,
130                    1 => serde_json::Value::Bool(tc.draw(gs::booleans())),
131                    2 => serde_json::Value::String(tc.draw(gs::text().max_size(16))),
132                    _ => serde_json::Value::from(
133                        tc.draw(
134                            gs::integers::<i64>()
135                                .min_value(JS_MIN_SAFE_INTEGER)
136                                .max_value(JS_MAX_SAFE_INTEGER),
137                        ),
138                    ),
139                };
140                map.insert(key, value);
141            }
142            serde_json::Value::Object(map)
143        }
144
145        #[hegel::test]
146        fn prop_jcs_idempotent(tc: hegel::TestCase) {
147            let value = draw_value(&tc);
148            let once = to_jcs_bytes(&value).unwrap_or_else(|_| std::process::abort());
149            let twice = canonicalize_json_bytes(&once).unwrap_or_else(|_| std::process::abort());
150            assert_eq!(once, twice);
151        }
152
153        #[hegel::test]
154        fn prop_jcs_keys_sorted(tc: hegel::TestCase) {
155            let value = draw_value(&tc);
156            let bytes = to_jcs_bytes(&value).unwrap_or_else(|_| std::process::abort());
157            // Parse back and verify top-level keys in lex order.
158            let parsed: serde_json::Value =
159                serde_json::from_slice(&bytes).unwrap_or_else(|_| std::process::abort());
160            if let serde_json::Value::Object(map) = parsed {
161                let keys: Vec<&String> = map.keys().collect();
162                for window in keys.windows(2) {
163                    assert!(window[0] <= window[1]);
164                }
165            }
166        }
167
168        #[hegel::test]
169        fn prop_jcs_no_whitespace_between_tokens(tc: hegel::TestCase) {
170            // Build a nested object to expose any inter-token whitespace.
171            let value = serde_json::json!({
172                "outer": draw_value(&tc),
173                "array": [1, "two", true, null],
174            });
175            let bytes = to_jcs_bytes(&value).unwrap_or_else(|_| std::process::abort());
176            // Track whether we're inside a string.
177            let mut in_string = false;
178            let mut escaped = false;
179            for &byte in &bytes {
180                if in_string {
181                    if escaped {
182                        escaped = false;
183                    } else if byte == b'\\' {
184                        escaped = true;
185                    } else if byte == b'"' {
186                        in_string = false;
187                    }
188                    continue;
189                }
190                if byte == b'"' {
191                    in_string = true;
192                    continue;
193                }
194                // Outside a string, RFC 8785 forbids whitespace.
195                assert!(
196                    byte != b' ' && byte != b'\t' && byte != b'\n' && byte != b'\r',
197                    "found whitespace outside string at byte {byte}"
198                );
199            }
200        }
201
202        #[hegel::test]
203        fn prop_jcs_parse_roundtrip_semantic(tc: hegel::TestCase) {
204            let value = draw_value(&tc);
205            let bytes = to_jcs_bytes(&value).unwrap_or_else(|_| std::process::abort());
206            let parsed: serde_json::Value =
207                serde_json::from_slice(&bytes).unwrap_or_else(|_| std::process::abort());
208            assert_eq!(parsed, value);
209        }
210
211        #[hegel::test]
212        fn prop_jcs_reordering_input_preserves_output(tc: hegel::TestCase) {
213            // Same Value built two different ways:
214            //   forward:  inserted in lex order
215            //   reverse:  inserted in reverse lex order
216            // JCS must produce the same bytes.
217            let n = tc.draw(gs::integers::<usize>().min_value(1).max_value(6));
218            let pairs: Vec<(String, i64)> = (0..n)
219                .map(|i| {
220                    (
221                        format!("k_{i:02}"),
222                        tc.draw(
223                            gs::integers::<i64>()
224                                .min_value(JS_MIN_SAFE_INTEGER)
225                                .max_value(JS_MAX_SAFE_INTEGER),
226                        ),
227                    )
228                })
229                .collect();
230
231            let mut forward = serde_json::Map::new();
232            for (k, v) in &pairs {
233                forward.insert(k.clone(), serde_json::Value::from(*v));
234            }
235            let mut reverse = serde_json::Map::new();
236            for (k, v) in pairs.iter().rev() {
237                reverse.insert(k.clone(), serde_json::Value::from(*v));
238            }
239            let a = to_jcs_bytes(&serde_json::Value::Object(forward))
240                .unwrap_or_else(|_| std::process::abort());
241            let b = to_jcs_bytes(&serde_json::Value::Object(reverse))
242                .unwrap_or_else(|_| std::process::abort());
243            assert_eq!(a, b);
244        }
245    }
246}