use sha2::{Digest, Sha256};
use serde_json::Value;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HasherError {
NonFiniteFloat(String),
}
impl std::fmt::Display for HasherError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
HasherError::NonFiniteFloat(s) => write!(f, "canonical JSON error: {s}"),
}
}
}
impl std::error::Error for HasherError {}
pub fn canonical_json(v: &Value) -> Result<Vec<u8>, HasherError> {
let mut buf = Vec::with_capacity(128);
canonical_json_into(v, &mut buf)?;
Ok(buf)
}
fn canonical_json_into(v: &Value, buf: &mut Vec<u8>) -> Result<(), HasherError> {
match v {
Value::Null => buf.extend_from_slice(b"null"),
Value::Bool(b) => buf.extend_from_slice(if *b { b"true" } else { b"false" }),
Value::Number(n) => {
if n.is_f64() {
let f = n.as_f64().unwrap();
if f.is_nan() || f.is_infinite() {
return Err(HasherError::NonFiniteFloat(format!(
"non-finite float: {f}"
)));
}
}
buf.extend_from_slice(n.to_string().as_bytes());
}
Value::String(s) => {
let json_str = serde_json::to_string(s).expect("serde_json string serialization");
buf.extend_from_slice(json_str.as_bytes());
}
Value::Array(arr) => {
buf.push(b'[');
for (i, item) in arr.iter().enumerate() {
if i > 0 {
buf.push(b',');
}
canonical_json_into(item, buf)?;
}
buf.push(b']');
}
Value::Object(obj) => {
buf.push(b'{');
let mut keys: Vec<&str> = obj.keys().map(|k| k.as_str()).collect();
keys.sort_unstable();
for (i, key) in keys.iter().enumerate() {
if i > 0 {
buf.push(b',');
}
let json_key = serde_json::to_string(key).expect("key serialization");
buf.extend_from_slice(json_key.as_bytes());
buf.push(b':');
canonical_json_into(&obj[*key], buf)?;
}
buf.push(b'}');
}
}
Ok(())
}
pub fn hash_bytes(data: &[u8]) -> String {
let digest = Sha256::digest(data);
hex::encode(digest)
}
pub fn hash_string(s: &str) -> String {
hash_bytes(s.as_bytes())
}
pub fn hash_object(v: &Value) -> Result<String, HasherError> {
let bytes = canonical_json(v)?;
Ok(hash_bytes(&bytes))
}
pub fn must_hash_object(v: &Value) -> String {
hash_object(v).expect("hash_object failed")
}
pub fn prefixed_hash(hash: &str) -> String {
format!("sha256:{hash}")
}
pub fn hash_object_prefixed(v: &Value) -> Result<String, HasherError> {
Ok(prefixed_hash(&hash_object(v)?))
}
pub fn equal(a: &str, b: &str) -> bool {
let a_lower = a.to_lowercase();
let b_lower = b.to_lowercase();
if a_lower.len() != b_lower.len() {
return false;
}
let mut diff: u8 = 0;
for (x, y) in a_lower.bytes().zip(b_lower.bytes()) {
diff |= x ^ y;
}
diff == 0
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn canonical_json_null() {
assert_eq!(canonical_json(&Value::Null).unwrap(), b"null");
}
#[test]
fn canonical_json_bool_true() {
assert_eq!(canonical_json(&json!(true)).unwrap(), b"true");
}
#[test]
fn canonical_json_bool_false() {
assert_eq!(canonical_json(&json!(false)).unwrap(), b"false");
}
#[test]
fn canonical_json_integer() {
assert_eq!(canonical_json(&json!(42)).unwrap(), b"42");
}
#[test]
fn canonical_json_float() {
assert_eq!(canonical_json(&json!(1.5)).unwrap(), b"1.5");
}
#[test]
fn canonical_json_string() {
assert_eq!(canonical_json(&json!("hello")).unwrap(), b"\"hello\"");
}
#[test]
fn canonical_json_string_with_escapes() {
let result = canonical_json(&json!("a\nb")).unwrap();
assert_eq!(result, b"\"a\\nb\"");
}
#[test]
fn canonical_json_array_preserves_order() {
let result = canonical_json(&json!([3, 1, 2])).unwrap();
assert_eq!(result, b"[3,1,2]");
}
#[test]
fn canonical_json_object_keys_sorted() {
let result = canonical_json(&json!({"b": 2, "a": 1})).unwrap();
assert_eq!(result, b"{\"a\":1,\"b\":2}");
}
#[test]
fn canonical_json_nested_object_keys_sorted() {
let result = canonical_json(&json!({"z": {"b": 2, "a": 1}})).unwrap();
assert_eq!(result, b"{\"z\":{\"a\":1,\"b\":2}}");
}
#[test]
fn canonical_json_empty_object() {
assert_eq!(canonical_json(&json!({})).unwrap(), b"{}");
}
#[test]
fn canonical_json_empty_array() {
assert_eq!(canonical_json(&json!([])).unwrap(), b"[]");
}
#[test]
fn hash_bytes_known_vector() {
let h = hash_bytes(b"");
assert_eq!(h, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
}
#[test]
fn hash_bytes_abc_vector() {
let h = hash_bytes(b"abc");
assert_eq!(h, "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad");
}
#[test]
fn hash_bytes_returns_64_char_hex() {
let h = hash_bytes(b"test");
assert_eq!(h.len(), 64);
assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn hash_bytes_is_lowercase() {
let h = hash_bytes(b"test");
assert_eq!(h, h.to_lowercase());
}
#[test]
fn hash_string_equals_hash_bytes_of_utf8() {
let s = "hello world";
assert_eq!(hash_string(s), hash_bytes(s.as_bytes()));
}
#[test]
fn hash_string_empty() {
assert_eq!(hash_string(""), hash_bytes(b""));
}
#[test]
fn hash_object_sorted_keys_matches_manual() {
let expected = hash_bytes(b"{\"a\":1,\"b\":2}");
let result = hash_object(&json!({"b": 2, "a": 1})).unwrap();
assert_eq!(result, expected);
}
#[test]
fn hash_object_is_deterministic() {
let v = json!({"model": "gpt-4", "confidence": 0.95, "seq": 0});
let h1 = hash_object(&v).unwrap();
let h2 = hash_object(&v).unwrap();
assert_eq!(h1, h2);
}
#[test]
fn hash_object_null_value() {
let h = hash_object(&Value::Null).unwrap();
assert_eq!(h, hash_bytes(b"null"));
}
#[test]
fn prefixed_hash_prepends_sha256_colon() {
let h = "a".repeat(64);
assert_eq!(prefixed_hash(&h), format!("sha256:{h}"));
}
#[test]
fn hash_object_prefixed_starts_with_sha256() {
let result = hash_object_prefixed(&json!({"x": 1})).unwrap();
assert!(result.starts_with("sha256:"));
assert_eq!(result.len(), 7 + 64);
}
#[test]
fn equal_same_hash() {
let h = hash_bytes(b"test");
assert!(equal(&h, &h));
}
#[test]
fn equal_case_insensitive() {
let lower = "aabbcc".to_string() + &"00".repeat(29);
let upper = lower.to_uppercase();
assert!(equal(&lower, &upper));
}
#[test]
fn equal_different_hashes() {
let h1 = hash_bytes(b"a");
let h2 = hash_bytes(b"b");
assert!(!equal(&h1, &h2));
}
#[test]
fn equal_different_lengths() {
assert!(!equal("abc", "abcd"));
}
#[test]
fn equal_empty_strings() {
assert!(equal("", ""));
}
#[test]
fn cross_sdk_canonical_json_vector() {
let v = json!({"model": "gpt-4", "seq": 0, "confidence": null});
let canon = canonical_json(&v).unwrap();
assert_eq!(canon, b"{\"confidence\":null,\"model\":\"gpt-4\",\"seq\":0}");
}
}