1use sha2::{Digest, Sha256};
24use serde_json::Value;
25
26#[derive(Debug, Clone, PartialEq, Eq)]
32pub enum HasherError {
33 NonFiniteFloat(String),
35}
36
37impl std::fmt::Display for HasherError {
38 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39 match self {
40 HasherError::NonFiniteFloat(s) => write!(f, "canonical JSON error: {s}"),
41 }
42 }
43}
44
45impl std::error::Error for HasherError {}
46
47pub fn canonical_json(v: &Value) -> Result<Vec<u8>, HasherError> {
58 let mut buf = Vec::with_capacity(128);
59 canonical_json_into(v, &mut buf)?;
60 Ok(buf)
61}
62
63fn canonical_json_into(v: &Value, buf: &mut Vec<u8>) -> Result<(), HasherError> {
64 match v {
65 Value::Null => buf.extend_from_slice(b"null"),
66 Value::Bool(b) => buf.extend_from_slice(if *b { b"true" } else { b"false" }),
67 Value::Number(n) => {
68 if n.is_f64() {
70 let f = n.as_f64().unwrap();
71 if f.is_nan() || f.is_infinite() {
72 return Err(HasherError::NonFiniteFloat(format!(
73 "non-finite float: {f}"
74 )));
75 }
76 }
77 buf.extend_from_slice(n.to_string().as_bytes());
78 }
79 Value::String(s) => {
80 let json_str = serde_json::to_string(s).expect("serde_json string serialization");
82 buf.extend_from_slice(json_str.as_bytes());
83 }
84 Value::Array(arr) => {
85 buf.push(b'[');
86 for (i, item) in arr.iter().enumerate() {
87 if i > 0 {
88 buf.push(b',');
89 }
90 canonical_json_into(item, buf)?;
91 }
92 buf.push(b']');
93 }
94 Value::Object(obj) => {
95 buf.push(b'{');
96 let mut keys: Vec<&str> = obj.keys().map(|k| k.as_str()).collect();
97 keys.sort_unstable();
98 for (i, key) in keys.iter().enumerate() {
99 if i > 0 {
100 buf.push(b',');
101 }
102 let json_key = serde_json::to_string(key).expect("key serialization");
103 buf.extend_from_slice(json_key.as_bytes());
104 buf.push(b':');
105 canonical_json_into(&obj[*key], buf)?;
106 }
107 buf.push(b'}');
108 }
109 }
110 Ok(())
111}
112
113pub fn hash_bytes(data: &[u8]) -> String {
119 let digest = Sha256::digest(data);
120 hex::encode(digest)
121}
122
123pub fn hash_string(s: &str) -> String {
127 hash_bytes(s.as_bytes())
128}
129
130pub fn hash_object(v: &Value) -> Result<String, HasherError> {
134 let bytes = canonical_json(v)?;
135 Ok(hash_bytes(&bytes))
136}
137
138pub fn must_hash_object(v: &Value) -> String {
140 hash_object(v).expect("hash_object failed")
141}
142
143pub fn prefixed_hash(hash: &str) -> String {
145 format!("sha256:{hash}")
146}
147
148pub fn hash_object_prefixed(v: &Value) -> Result<String, HasherError> {
150 Ok(prefixed_hash(&hash_object(v)?))
151}
152
153pub fn equal(a: &str, b: &str) -> bool {
158 let a_lower = a.to_lowercase();
159 let b_lower = b.to_lowercase();
160 if a_lower.len() != b_lower.len() {
161 return false;
162 }
163 let mut diff: u8 = 0;
165 for (x, y) in a_lower.bytes().zip(b_lower.bytes()) {
166 diff |= x ^ y;
167 }
168 diff == 0
169}
170
171#[cfg(test)]
176mod tests {
177 use super::*;
178 use serde_json::json;
179
180 #[test]
183 fn canonical_json_null() {
184 assert_eq!(canonical_json(&Value::Null).unwrap(), b"null");
185 }
186
187 #[test]
188 fn canonical_json_bool_true() {
189 assert_eq!(canonical_json(&json!(true)).unwrap(), b"true");
190 }
191
192 #[test]
193 fn canonical_json_bool_false() {
194 assert_eq!(canonical_json(&json!(false)).unwrap(), b"false");
195 }
196
197 #[test]
198 fn canonical_json_integer() {
199 assert_eq!(canonical_json(&json!(42)).unwrap(), b"42");
200 }
201
202 #[test]
203 fn canonical_json_float() {
204 assert_eq!(canonical_json(&json!(1.5)).unwrap(), b"1.5");
205 }
206
207 #[test]
208 fn canonical_json_string() {
209 assert_eq!(canonical_json(&json!("hello")).unwrap(), b"\"hello\"");
210 }
211
212 #[test]
213 fn canonical_json_string_with_escapes() {
214 let result = canonical_json(&json!("a\nb")).unwrap();
215 assert_eq!(result, b"\"a\\nb\"");
216 }
217
218 #[test]
219 fn canonical_json_array_preserves_order() {
220 let result = canonical_json(&json!([3, 1, 2])).unwrap();
221 assert_eq!(result, b"[3,1,2]");
222 }
223
224 #[test]
225 fn canonical_json_object_keys_sorted() {
226 let result = canonical_json(&json!({"b": 2, "a": 1})).unwrap();
227 assert_eq!(result, b"{\"a\":1,\"b\":2}");
228 }
229
230 #[test]
231 fn canonical_json_nested_object_keys_sorted() {
232 let result = canonical_json(&json!({"z": {"b": 2, "a": 1}})).unwrap();
233 assert_eq!(result, b"{\"z\":{\"a\":1,\"b\":2}}");
234 }
235
236 #[test]
237 fn canonical_json_empty_object() {
238 assert_eq!(canonical_json(&json!({})).unwrap(), b"{}");
239 }
240
241 #[test]
242 fn canonical_json_empty_array() {
243 assert_eq!(canonical_json(&json!([])).unwrap(), b"[]");
244 }
245
246 #[test]
249 fn hash_bytes_known_vector() {
250 let h = hash_bytes(b"");
252 assert_eq!(h, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
253 }
254
255 #[test]
256 fn hash_bytes_abc_vector() {
257 let h = hash_bytes(b"abc");
259 assert_eq!(h, "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad");
260 }
261
262 #[test]
263 fn hash_bytes_returns_64_char_hex() {
264 let h = hash_bytes(b"test");
265 assert_eq!(h.len(), 64);
266 assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
267 }
268
269 #[test]
270 fn hash_bytes_is_lowercase() {
271 let h = hash_bytes(b"test");
272 assert_eq!(h, h.to_lowercase());
273 }
274
275 #[test]
278 fn hash_string_equals_hash_bytes_of_utf8() {
279 let s = "hello world";
280 assert_eq!(hash_string(s), hash_bytes(s.as_bytes()));
281 }
282
283 #[test]
284 fn hash_string_empty() {
285 assert_eq!(hash_string(""), hash_bytes(b""));
286 }
287
288 #[test]
291 fn hash_object_sorted_keys_matches_manual() {
292 let expected = hash_bytes(b"{\"a\":1,\"b\":2}");
294 let result = hash_object(&json!({"b": 2, "a": 1})).unwrap();
295 assert_eq!(result, expected);
296 }
297
298 #[test]
299 fn hash_object_is_deterministic() {
300 let v = json!({"model": "gpt-4", "confidence": 0.95, "seq": 0});
301 let h1 = hash_object(&v).unwrap();
302 let h2 = hash_object(&v).unwrap();
303 assert_eq!(h1, h2);
304 }
305
306 #[test]
307 fn hash_object_null_value() {
308 let h = hash_object(&Value::Null).unwrap();
309 assert_eq!(h, hash_bytes(b"null"));
310 }
311
312 #[test]
315 fn prefixed_hash_prepends_sha256_colon() {
316 let h = "a".repeat(64);
317 assert_eq!(prefixed_hash(&h), format!("sha256:{h}"));
318 }
319
320 #[test]
323 fn hash_object_prefixed_starts_with_sha256() {
324 let result = hash_object_prefixed(&json!({"x": 1})).unwrap();
325 assert!(result.starts_with("sha256:"));
326 assert_eq!(result.len(), 7 + 64);
327 }
328
329 #[test]
332 fn equal_same_hash() {
333 let h = hash_bytes(b"test");
334 assert!(equal(&h, &h));
335 }
336
337 #[test]
338 fn equal_case_insensitive() {
339 let lower = "aabbcc".to_string() + &"00".repeat(29);
340 let upper = lower.to_uppercase();
341 assert!(equal(&lower, &upper));
342 }
343
344 #[test]
345 fn equal_different_hashes() {
346 let h1 = hash_bytes(b"a");
347 let h2 = hash_bytes(b"b");
348 assert!(!equal(&h1, &h2));
349 }
350
351 #[test]
352 fn equal_different_lengths() {
353 assert!(!equal("abc", "abcd"));
354 }
355
356 #[test]
357 fn equal_empty_strings() {
358 assert!(equal("", ""));
359 }
360
361 #[test]
364 fn cross_sdk_canonical_json_vector() {
365 let v = json!({"model": "gpt-4", "seq": 0, "confidence": null});
369 let canon = canonical_json(&v).unwrap();
370 assert_eq!(canon, b"{\"confidence\":null,\"model\":\"gpt-4\",\"seq\":0}");
371 }
372}