use serde_json::Value;
use sha2::{Digest, Sha256};
pub const HASH_WIDTH: usize = 32;
const MAX_ARRAY_INDEX: u64 = (1u64 << 32) - 2;
pub fn canonical_json(value: &Value) -> String {
let mut out = String::new();
write_value(value, &mut out);
out
}
pub fn content_hash(value: &Value) -> String {
let bytes = canonical_json(value);
let digest = Sha256::digest(bytes.as_bytes());
let hex = hex::encode(digest);
hex[..HASH_WIDTH].to_string()
}
fn write_value(value: &Value, out: &mut String) {
match value {
Value::Null => out.push_str("null"),
Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
Value::Number(n) => write_number(n, out),
Value::String(s) => write_string(s, out),
Value::Array(items) => {
out.push('[');
for (index, item) in items.iter().enumerate() {
if index > 0 {
out.push(',');
}
write_value(item, out);
}
out.push(']');
}
Value::Object(map) => {
out.push('{');
for (position, key) in js_key_order(map).into_iter().enumerate() {
if position > 0 {
out.push(',');
}
write_string(key, out);
out.push(':');
write_value(&map[key], out);
}
out.push('}');
}
}
}
fn js_key_order(map: &serde_json::Map<String, Value>) -> Vec<&String> {
let mut indices: Vec<&String> = map.keys().filter(|k| is_array_index(k)).collect();
if indices.is_empty() {
return map.keys().collect();
}
let others: Vec<&String> = map.keys().filter(|k| !is_array_index(k)).collect();
indices.sort_by_key(|k| k.parse::<u64>().unwrap_or(0));
indices.extend(others);
indices
}
fn is_array_index(key: &str) -> bool {
if key.is_empty() || !key.bytes().all(|b| b.is_ascii_digit()) {
return false;
}
if key.len() > 1 && key.starts_with('0') {
return false; }
key.parse::<u64>()
.map(|n| n <= MAX_ARRAY_INDEX)
.unwrap_or(false)
}
fn write_string(s: &str, out: &mut String) {
out.push('"');
for ch in s.chars() {
match ch {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\u{08}' => out.push_str("\\b"),
'\u{0c}' => out.push_str("\\f"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c if (c as u32) < 0x20 => {
out.push_str(&format!("\\u{:04x}", c as u32));
}
c => out.push(c),
}
}
out.push('"');
}
fn write_number(n: &serde_json::Number, out: &mut String) {
if let Some(i) = n.as_i64() {
out.push_str(&i.to_string());
return;
}
if let Some(u) = n.as_u64() {
out.push_str(&u.to_string());
return;
}
match n.as_f64() {
Some(f) => out.push_str(&js_number(f)),
None => out.push_str("null"),
}
}
fn js_number(value: f64) -> String {
if value.is_nan() || value.is_infinite() {
return "null".to_string();
}
if value == 0.0 {
return "0".to_string();
}
let negative = value < 0.0;
let magnitude = value.abs();
let (digits, n) = decompose(magnitude);
let k = digits.len() as i64;
let prefix = if negative { "-" } else { "" };
let body = if k <= n && n <= 21 {
let mut s = digits;
for _ in 0..(n - k) {
s.push('0');
}
s
} else if 0 < n && n <= 21 {
let split = n as usize;
format!("{}.{}", &digits[..split], &digits[split..])
} else if -6 < n && n <= 0 {
let zeros = "0".repeat((-n) as usize);
format!("0.{}{}", zeros, digits)
} else {
let mantissa = if k > 1 {
format!("{}.{}", &digits[..1], &digits[1..])
} else {
digits.clone()
};
let e = n - 1;
let sign = if e >= 0 { "e+" } else { "e-" };
format!("{}{}{}", mantissa, sign, e.abs())
};
format!("{prefix}{body}")
}
fn decompose(magnitude: f64) -> (String, i64) {
let sci = format!("{magnitude:e}"); let (mantissa, exp_str) = sci.split_once('e').expect("{:e} always has an 'e'");
let exp: i64 = exp_str.parse().expect("exponent is an integer");
let (int_part, frac_part) = match mantissa.split_once('.') {
Some((i, f)) => (i, f),
None => (mantissa, ""),
};
let mut digits = String::with_capacity(int_part.len() + frac_part.len());
digits.push_str(int_part);
digits.push_str(frac_part);
let frac_len = frac_part.len() as i64;
while digits.len() > 1 && digits.ends_with('0') {
digits.pop();
}
let k = digits.len() as i64;
let original_len = (int_part.len() + frac_part.len()) as i64;
let removed = original_len - k;
let pow = exp - frac_len + removed;
let n = pow + k;
(digits, n)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn primitives_match_json_stringify() {
assert_eq!(canonical_json(&json!(null)), "null");
assert_eq!(canonical_json(&json!(true)), "true");
assert_eq!(canonical_json(&json!(false)), "false");
assert_eq!(canonical_json(&json!("hi")), "\"hi\"");
assert_eq!(canonical_json(&json!(42)), "42");
assert_eq!(canonical_json(&json!(-7)), "-7");
}
#[test]
fn objects_have_no_whitespace_and_keep_insertion_order() {
let v = json!({ "b": 1, "a": 2, "c": 3 });
assert_eq!(canonical_json(&v), r#"{"b":1,"a":2,"c":3}"#);
}
#[test]
fn arrays_have_compact_separators() {
assert_eq!(canonical_json(&json!([1, 2, 3])), "[1,2,3]");
assert_eq!(canonical_json(&json!([])), "[]");
}
#[test]
fn numeric_keys_sort_ascending_before_string_keys() {
let v = json!({ "10": 0, "z": 0, "2": 0, "a": 0 });
assert_eq!(canonical_json(&v), r#"{"2":0,"10":0,"z":0,"a":0}"#);
}
#[test]
fn non_canonical_numeric_keys_keep_insertion_order() {
let v = json!({ "01": 0, "1": 0 });
assert_eq!(canonical_json(&v), r#"{"1":0,"01":0}"#);
}
#[test]
fn string_escaping_matches_json_stringify() {
assert_eq!(canonical_json(&json!("a\"b\\c")), r#""a\"b\\c""#);
assert_eq!(canonical_json(&json!("tab\there")), r#""tab\there""#);
assert_eq!(canonical_json(&json!("line\nbreak")), r#""line\nbreak""#);
assert_eq!(canonical_json(&json!("\u{01}")), "\"\\u0001\"");
assert_eq!(canonical_json(&json!("世界😀")), "\"世界😀\"");
}
#[test]
fn integral_floats_drop_the_fraction() {
assert_eq!(js_number(1.0), "1");
assert_eq!(js_number(100.0), "100");
assert_eq!(js_number(-3.0), "-3");
}
#[test]
fn fractional_floats_use_plain_notation_in_range() {
assert_eq!(js_number(0.5), "0.5");
assert_eq!(js_number(-3.25), "-3.25");
assert_eq!(js_number(1.5), "1.5");
assert_eq!(js_number(123.456), "123.456");
}
#[test]
fn small_and_large_magnitudes_use_exponent_forms() {
assert_eq!(js_number(1e21), "1e+21");
assert_eq!(js_number(1e-7), "1e-7");
assert_eq!(js_number(1e-6), "0.000001"); assert_eq!(js_number(1e20), "100000000000000000000");
}
#[test]
fn zero_and_non_finite() {
assert_eq!(js_number(0.0), "0");
assert_eq!(js_number(-0.0), "0");
assert_eq!(js_number(f64::NAN), "null");
assert_eq!(js_number(f64::INFINITY), "null");
assert_eq!(js_number(f64::NEG_INFINITY), "null");
}
const GOLDEN_HASH: &str = "49a843a2b9dfc7f04a4fe5be52b90bfa";
const GOLDEN_PAYLOAD: &str = "{\"parent\":\"abc123\",\"turn\":{\"role\":\"assistant\",\"blocks\":[{\"kind\":\"text\",\"text\":\"Hello 世界 😀\"},{\"kind\":\"tool_call\",\"id\":\"call_0\",\"name\":\"bash\",\"input\":{\"cmd\":\"ls\",\"n\":42,\"ratio\":0.5,\"big\":1e+21,\"tiny\":1e-7,\"neg\":-3.25}}]},\"createdAt\":1718000000000}";
fn golden_value() -> Value {
json!({
"parent": "abc123",
"turn": {
"role": "assistant",
"blocks": [
{ "kind": "text", "text": "Hello 世界 😀" },
{
"kind": "tool_call",
"id": "call_0",
"name": "bash",
"input": { "cmd": "ls", "n": 42, "ratio": 0.5, "big": 1e21, "tiny": 1e-7, "neg": -3.25 }
}
]
},
"createdAt": 1718000000000i64
})
}
#[test]
fn canonical_payload_matches_node_and_python_byte_for_byte() {
assert_eq!(canonical_json(&golden_value()), GOLDEN_PAYLOAD);
}
#[test]
fn content_hash_matches_cross_language_golden() {
assert_eq!(content_hash(&golden_value()), GOLDEN_HASH);
assert_eq!(content_hash(&golden_value()).len(), HASH_WIDTH);
}
}