use std::io::Write;
use crate::error::AcdpError;
use serde::Serialize;
const MAX_JCS_DEPTH: usize = 256;
pub fn canonicalize<T: Serialize>(value: &T) -> Result<Vec<u8>, AcdpError> {
let v = serde_json::to_value(value).map_err(|e| AcdpError::Canonicalization(e.to_string()))?;
try_canonicalize_value(&v)
}
pub fn try_canonicalize_value(value: &serde_json::Value) -> Result<Vec<u8>, AcdpError> {
let mut out = Vec::with_capacity(256);
write_value(value, &mut out, 0)?;
Ok(out)
}
pub fn canonicalize_value(value: &serde_json::Value) -> Vec<u8> {
try_canonicalize_value(value)
.expect("JCS canonicalization exceeded depth limit; use try_canonicalize_value")
}
fn write_value(v: &serde_json::Value, out: &mut Vec<u8>, depth: usize) -> Result<(), AcdpError> {
if depth > MAX_JCS_DEPTH {
return Err(AcdpError::Canonicalization(format!(
"JSON nesting depth exceeds {MAX_JCS_DEPTH}"
)));
}
match v {
serde_json::Value::Null => out.extend_from_slice(b"null"),
serde_json::Value::Bool(true) => out.extend_from_slice(b"true"),
serde_json::Value::Bool(false) => out.extend_from_slice(b"false"),
serde_json::Value::Number(n) => write_number(n, out),
serde_json::Value::String(s) => write_string(s, out),
serde_json::Value::Array(arr) => {
out.push(b'[');
for (i, elem) in arr.iter().enumerate() {
if i > 0 {
out.push(b',');
}
write_value(elem, out, depth + 1)?;
}
out.push(b']');
}
serde_json::Value::Object(map) => {
let mut keys: Vec<&String> = map.keys().collect();
keys.sort();
out.push(b'{');
for (i, key) in keys.iter().enumerate() {
if i > 0 {
out.push(b',');
}
write_string(key, out);
out.push(b':');
write_value(&map[key.as_str()], out, depth + 1)?;
}
out.push(b'}');
}
}
Ok(())
}
fn write_number(n: &serde_json::Number, out: &mut Vec<u8>) {
if n.is_i64() || n.is_u64() {
out.extend_from_slice(n.to_string().as_bytes());
return;
}
let Some(f) = n.as_f64() else {
out.extend_from_slice(n.to_string().as_bytes());
return;
};
if f == 0.0 {
out.push(b'0');
return;
}
debug_assert!(
f.is_finite(),
"non-finite f64 reached JCS canonicalization ({f}); reject \
non-finite numbers before hashing (RFC 8785 §3.2.2.3)"
);
if !f.is_finite() {
out.extend_from_slice(b"null");
return;
}
out.extend_from_slice(ecma_number_string(f).as_bytes());
}
fn ecma_number_string(f: f64) -> String {
let neg = f.is_sign_negative();
let sci = format!("{:e}", f.abs());
let (mantissa, exp) = sci.split_once('e').expect("{:e} always emits 'e'");
let e10: i32 = exp.parse().expect("{:e} exponent is an integer");
let digits: String = mantissa.chars().filter(|c| *c != '.').collect();
let digits = digits.trim_end_matches('0');
let digits = if digits.is_empty() { "0" } else { digits };
let k = digits.len() as i32; let n = e10 + 1;
let body = if (k..=21).contains(&n) {
format!("{digits}{}", "0".repeat((n - k) as usize))
} else if (1..=21).contains(&n) {
format!("{}.{}", &digits[..n as usize], &digits[n as usize..])
} else if (-5..=0).contains(&n) {
format!("0.{}{digits}", "0".repeat((-n) as usize))
} else if k == 1 {
format!("{digits}e{}{}", exp_sign(n - 1), (n - 1).abs())
} else {
format!(
"{}.{}e{}{}",
&digits[..1],
&digits[1..],
exp_sign(n - 1),
(n - 1).abs()
)
};
if neg {
format!("-{body}")
} else {
body
}
}
fn exp_sign(e: i32) -> char {
if e >= 0 {
'+'
} else {
'-'
}
}
fn write_string(s: &str, out: &mut Vec<u8>) {
out.push(b'"');
for ch in s.chars() {
match ch {
'"' => out.extend_from_slice(b"\\\""),
'\\' => out.extend_from_slice(b"\\\\"),
'\n' => out.extend_from_slice(b"\\n"),
'\r' => out.extend_from_slice(b"\\r"),
'\t' => out.extend_from_slice(b"\\t"),
c if (c as u32) < 0x20 => {
write!(out, "\\u{:04x}", c as u32).unwrap();
}
c => {
let mut buf = [0u8; 4];
let encoded = c.encode_utf8(&mut buf);
out.extend_from_slice(encoded.as_bytes());
}
}
}
out.push(b'"');
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn sorts_keys() {
let v = json!({"z": 1, "a": 2, "m": 3});
let out = canonicalize_value(&v);
assert_eq!(out, b"{\"a\":2,\"m\":3,\"z\":1}");
}
#[test]
fn negative_zero_becomes_zero() {
let v = json!({"values": [42, -7, 0, 1.1, 1.5, -0.0_f64]});
let out = canonicalize_value(&v);
let s = std::str::from_utf8(&out).unwrap();
assert!(!s.contains("-0"), "found '-0' in: {s}");
}
#[test]
fn unicode_as_is() {
let v = json!({"title": "café"});
let out = canonicalize_value(&v);
assert_eq!(out, "{\"title\":\"café\"}".as_bytes());
}
#[test]
fn empty_vs_absent() {
let with_tags = json!({"tags": [], "v": 1});
let without = json!({"v": 1});
let h1 = {
use sha2::{Digest, Sha256};
hex::encode(Sha256::digest(canonicalize_value(&with_tags)))
};
let h2 = {
use sha2::{Digest, Sha256};
hex::encode(Sha256::digest(canonicalize_value(&without)))
};
assert_ne!(h1, h2, "empty array and absent field must hash differently");
}
#[test]
fn minimal_body_golden_hash() {
let body = json!({
"agent_id": "did:agent:test",
"contributors": [],
"data_refs": [],
"supersedes": null,
"title": "Minimal",
"type": "data_snapshot",
"version": 1
});
use sha2::{Digest, Sha256};
let h = hex::encode(Sha256::digest(canonicalize_value(&body)));
assert_eq!(
h,
"5f8d88d6758cfd43be875d49edc9eaa494de8ec645bf7de6c592b15bbb1e2e3c"
);
}
fn canon_number(json_token: &str) -> String {
let v: serde_json::Value = serde_json::from_str(json_token).unwrap();
String::from_utf8(canonicalize_value(&v)).unwrap()
}
#[test]
fn rfc8785_integer_vectors() {
for (input, expected) in [
("0", "0"),
("-0", "0"), ("1", "1"),
("-1", "-1"),
("100", "100"),
("9007199254740992", "9007199254740992"), ("9007199254740993", "9007199254740993"), ("18446744073709551615", "18446744073709551615"), ("-9223372036854775808", "-9223372036854775808"), ] {
assert_eq!(canon_number(input), expected, "input={input}");
}
}
#[test]
fn rfc8785_negative_zero_float_becomes_zero() {
assert_eq!(canon_number("-0.0"), "0");
let v = json!({"a": [-0.0_f64, 1], "b": -0.0_f64});
let s = String::from_utf8(canonicalize_value(&v)).unwrap();
assert_eq!(s, r#"{"a":[0,1],"b":0}"#);
}
#[test]
fn rfc8785_plain_decimal_vectors() {
for (input, expected) in [
("0.1", "0.1"),
("1.5", "1.5"),
("-2.5", "-2.5"),
("123.456", "123.456"),
] {
assert_eq!(canon_number(input), expected, "input={input}");
}
}
#[test]
fn rfc8785_numeric_serialization_is_idempotent() {
for token in ["0", "-0", "42", "9007199254740993", "0.1", "-2.5", "-0.0"] {
let once = canon_number(token);
let twice = canon_number(&once);
assert_eq!(once, twice, "token={token}");
}
}
#[test]
fn rfc8785_ecmascript_float_bands() {
for (token, expected) in [
("1e21", "1e+21"),
("1e22", "1e+22"),
("1.23e25", "1.23e+25"),
("1e100", "1e+100"),
("1e-7", "1e-7"),
("1e-10", "1e-10"),
("5e-9", "5e-9"),
("1e-20", "1e-20"),
("1e-6", "0.000001"),
("0.1", "0.1"),
("1000000.5", "1000000.5"),
("12345.6789", "12345.6789"),
("1.0", "1"),
("100.0", "100"),
("1.7976931348623157e308", "1.7976931348623157e+308"),
("5e-324", "5e-324"),
] {
assert_eq!(canon_number(token), expected, "token={token}");
}
}
#[test]
fn rfc8785_all_zeros_normalize() {
for token in ["0", "-0", "0.0", "-0.0", "0e0", "-0.0e10"] {
assert_eq!(canon_number(token), "0", "token={token}");
}
}
}