Skip to main content

aria_bsv_hasher/
lib.rs

1//! aria-bsv-hasher — Canonical JSON serialization and SHA-256 hashing.
2//!
3//! Implements the ARIA BRC-121 canonical JSON format, which is identical
4//! to the Python SDK's `canonical_json()`: object keys are sorted
5//! lexicographically, arrays preserve order, and output is UTF-8 bytes
6//! with no whitespace.
7//!
8//! # Example
9//! ```rust
10//! use aria_bsv_hasher::{hash_object, hash_bytes, prefixed_hash};
11//! use serde_json::json;
12//!
13//! let h = hash_object(&json!({"b": 2, "a": 1})).unwrap();
14//! assert_eq!(h.len(), 64);  // lowercase hex SHA-256
15//!
16//! // Same result as: hash_bytes(b"{\"a\":1,\"b\":2}")
17//! let direct = hash_bytes(b"{\"a\":1,\"b\":2}");
18//! assert_eq!(h, direct);
19//!
20//! assert_eq!(prefixed_hash(&h), format!("sha256:{}", h));
21//! ```
22
23use sha2::{Digest, Sha256};
24use serde_json::Value;
25
26// ---------------------------------------------------------------------------
27// Error type
28// ---------------------------------------------------------------------------
29
30/// Errors produced by the hasher.
31#[derive(Debug, Clone, PartialEq, Eq)]
32pub enum HasherError {
33    /// The value cannot be represented in canonical JSON (e.g. NaN, Infinity).
34    NonFiniteFloat(String),
35}
36
37impl std::fmt::Display for HasherError {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            HasherError::NonFiniteFloat(s) => write!(f, "canonical JSON error: {s}"),
41        }
42    }
43}
44
45impl std::error::Error for HasherError {}
46
47// ---------------------------------------------------------------------------
48// Canonical JSON
49// ---------------------------------------------------------------------------
50
51/// Serialize `v` to deterministic JSON bytes.
52///
53/// - Object keys are sorted lexicographically (UTF-8 byte order).
54/// - Arrays preserve insertion order.
55/// - No whitespace is added.
56/// - Numbers that cannot be represented (NaN, Infinity) return an error.
57pub fn canonical_json(v: &Value) -> Result<Vec<u8>, HasherError> {
58    let mut buf = Vec::with_capacity(128);
59    canonical_json_into(v, &mut buf)?;
60    Ok(buf)
61}
62
63fn canonical_json_into(v: &Value, buf: &mut Vec<u8>) -> Result<(), HasherError> {
64    match v {
65        Value::Null => buf.extend_from_slice(b"null"),
66        Value::Bool(b) => buf.extend_from_slice(if *b { b"true" } else { b"false" }),
67        Value::Number(n) => {
68            // serde_json's Number doesn't allow NaN/Infinity, but guard anyway.
69            if n.is_f64() {
70                let f = n.as_f64().unwrap();
71                if f.is_nan() || f.is_infinite() {
72                    return Err(HasherError::NonFiniteFloat(format!(
73                        "non-finite float: {f}"
74                    )));
75                }
76            }
77            buf.extend_from_slice(n.to_string().as_bytes());
78        }
79        Value::String(s) => {
80            // Use serde_json for correct JSON string escaping.
81            let json_str = serde_json::to_string(s).expect("serde_json string serialization");
82            buf.extend_from_slice(json_str.as_bytes());
83        }
84        Value::Array(arr) => {
85            buf.push(b'[');
86            for (i, item) in arr.iter().enumerate() {
87                if i > 0 {
88                    buf.push(b',');
89                }
90                canonical_json_into(item, buf)?;
91            }
92            buf.push(b']');
93        }
94        Value::Object(obj) => {
95            buf.push(b'{');
96            let mut keys: Vec<&str> = obj.keys().map(|k| k.as_str()).collect();
97            keys.sort_unstable();
98            for (i, key) in keys.iter().enumerate() {
99                if i > 0 {
100                    buf.push(b',');
101                }
102                let json_key = serde_json::to_string(key).expect("key serialization");
103                buf.extend_from_slice(json_key.as_bytes());
104                buf.push(b':');
105                canonical_json_into(&obj[*key], buf)?;
106            }
107            buf.push(b'}');
108        }
109    }
110    Ok(())
111}
112
113// ---------------------------------------------------------------------------
114// Hashing API
115// ---------------------------------------------------------------------------
116
117/// Hash raw bytes with SHA-256. Returns lowercase hex.
118pub fn hash_bytes(data: &[u8]) -> String {
119    let digest = Sha256::digest(data);
120    hex::encode(digest)
121}
122
123/// Hash a UTF-8 string with SHA-256. Returns lowercase hex.
124///
125/// Equivalent to `hash_bytes(s.as_bytes())`.
126pub fn hash_string(s: &str) -> String {
127    hash_bytes(s.as_bytes())
128}
129
130/// Canonicalize `v` and return SHA-256 hex of the canonical bytes.
131///
132/// Returns an error if `v` contains non-finite floats.
133pub fn hash_object(v: &Value) -> Result<String, HasherError> {
134    let bytes = canonical_json(v)?;
135    Ok(hash_bytes(&bytes))
136}
137
138/// Like [`hash_object`] but panics on error (useful in tests / infallible contexts).
139pub fn must_hash_object(v: &Value) -> String {
140    hash_object(v).expect("hash_object failed")
141}
142
143/// Prepend `"sha256:"` to `hash`. Does not validate `hash`.
144pub fn prefixed_hash(hash: &str) -> String {
145    format!("sha256:{hash}")
146}
147
148/// Canonicalize and hash `v`, returning `"sha256:<hex>"`.
149pub fn hash_object_prefixed(v: &Value) -> Result<String, HasherError> {
150    Ok(prefixed_hash(&hash_object(v)?))
151}
152
153/// Compare two SHA-256 hex strings in a case-insensitive way.
154///
155/// Returns `true` only when both strings decode to identical 32-byte digests.
156/// Strings of differing lengths are not equal.
157pub fn equal(a: &str, b: &str) -> bool {
158    let a_lower = a.to_lowercase();
159    let b_lower = b.to_lowercase();
160    if a_lower.len() != b_lower.len() {
161        return false;
162    }
163    // Constant-time comparison via XOR of all bytes.
164    let mut diff: u8 = 0;
165    for (x, y) in a_lower.bytes().zip(b_lower.bytes()) {
166        diff |= x ^ y;
167    }
168    diff == 0
169}
170
171// ---------------------------------------------------------------------------
172// Tests
173// ---------------------------------------------------------------------------
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178    use serde_json::json;
179
180    // -- canonical_json -------------------------------------------------------
181
182    #[test]
183    fn canonical_json_null() {
184        assert_eq!(canonical_json(&Value::Null).unwrap(), b"null");
185    }
186
187    #[test]
188    fn canonical_json_bool_true() {
189        assert_eq!(canonical_json(&json!(true)).unwrap(), b"true");
190    }
191
192    #[test]
193    fn canonical_json_bool_false() {
194        assert_eq!(canonical_json(&json!(false)).unwrap(), b"false");
195    }
196
197    #[test]
198    fn canonical_json_integer() {
199        assert_eq!(canonical_json(&json!(42)).unwrap(), b"42");
200    }
201
202    #[test]
203    fn canonical_json_float() {
204        assert_eq!(canonical_json(&json!(1.5)).unwrap(), b"1.5");
205    }
206
207    #[test]
208    fn canonical_json_string() {
209        assert_eq!(canonical_json(&json!("hello")).unwrap(), b"\"hello\"");
210    }
211
212    #[test]
213    fn canonical_json_string_with_escapes() {
214        let result = canonical_json(&json!("a\nb")).unwrap();
215        assert_eq!(result, b"\"a\\nb\"");
216    }
217
218    #[test]
219    fn canonical_json_array_preserves_order() {
220        let result = canonical_json(&json!([3, 1, 2])).unwrap();
221        assert_eq!(result, b"[3,1,2]");
222    }
223
224    #[test]
225    fn canonical_json_object_keys_sorted() {
226        let result = canonical_json(&json!({"b": 2, "a": 1})).unwrap();
227        assert_eq!(result, b"{\"a\":1,\"b\":2}");
228    }
229
230    #[test]
231    fn canonical_json_nested_object_keys_sorted() {
232        let result = canonical_json(&json!({"z": {"b": 2, "a": 1}})).unwrap();
233        assert_eq!(result, b"{\"z\":{\"a\":1,\"b\":2}}");
234    }
235
236    #[test]
237    fn canonical_json_empty_object() {
238        assert_eq!(canonical_json(&json!({})).unwrap(), b"{}");
239    }
240
241    #[test]
242    fn canonical_json_empty_array() {
243        assert_eq!(canonical_json(&json!([])).unwrap(), b"[]");
244    }
245
246    // -- hash_bytes -----------------------------------------------------------
247
248    #[test]
249    fn hash_bytes_known_vector() {
250        // SHA-256("") = e3b0c44298fc1c149afb...
251        let h = hash_bytes(b"");
252        assert_eq!(h, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
253    }
254
255    #[test]
256    fn hash_bytes_abc_vector() {
257        // SHA-256("abc") = ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad
258        let h = hash_bytes(b"abc");
259        assert_eq!(h, "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad");
260    }
261
262    #[test]
263    fn hash_bytes_returns_64_char_hex() {
264        let h = hash_bytes(b"test");
265        assert_eq!(h.len(), 64);
266        assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
267    }
268
269    #[test]
270    fn hash_bytes_is_lowercase() {
271        let h = hash_bytes(b"test");
272        assert_eq!(h, h.to_lowercase());
273    }
274
275    // -- hash_string ----------------------------------------------------------
276
277    #[test]
278    fn hash_string_equals_hash_bytes_of_utf8() {
279        let s = "hello world";
280        assert_eq!(hash_string(s), hash_bytes(s.as_bytes()));
281    }
282
283    #[test]
284    fn hash_string_empty() {
285        assert_eq!(hash_string(""), hash_bytes(b""));
286    }
287
288    // -- hash_object ----------------------------------------------------------
289
290    #[test]
291    fn hash_object_sorted_keys_matches_manual() {
292        // canonical JSON of {"a":1,"b":2} → SHA-256
293        let expected = hash_bytes(b"{\"a\":1,\"b\":2}");
294        let result = hash_object(&json!({"b": 2, "a": 1})).unwrap();
295        assert_eq!(result, expected);
296    }
297
298    #[test]
299    fn hash_object_is_deterministic() {
300        let v = json!({"model": "gpt-4", "confidence": 0.95, "seq": 0});
301        let h1 = hash_object(&v).unwrap();
302        let h2 = hash_object(&v).unwrap();
303        assert_eq!(h1, h2);
304    }
305
306    #[test]
307    fn hash_object_null_value() {
308        let h = hash_object(&Value::Null).unwrap();
309        assert_eq!(h, hash_bytes(b"null"));
310    }
311
312    // -- prefixed_hash --------------------------------------------------------
313
314    #[test]
315    fn prefixed_hash_prepends_sha256_colon() {
316        let h = "a".repeat(64);
317        assert_eq!(prefixed_hash(&h), format!("sha256:{h}"));
318    }
319
320    // -- hash_object_prefixed -------------------------------------------------
321
322    #[test]
323    fn hash_object_prefixed_starts_with_sha256() {
324        let result = hash_object_prefixed(&json!({"x": 1})).unwrap();
325        assert!(result.starts_with("sha256:"));
326        assert_eq!(result.len(), 7 + 64);
327    }
328
329    // -- equal ----------------------------------------------------------------
330
331    #[test]
332    fn equal_same_hash() {
333        let h = hash_bytes(b"test");
334        assert!(equal(&h, &h));
335    }
336
337    #[test]
338    fn equal_case_insensitive() {
339        let lower = "aabbcc".to_string() + &"00".repeat(29);
340        let upper = lower.to_uppercase();
341        assert!(equal(&lower, &upper));
342    }
343
344    #[test]
345    fn equal_different_hashes() {
346        let h1 = hash_bytes(b"a");
347        let h2 = hash_bytes(b"b");
348        assert!(!equal(&h1, &h2));
349    }
350
351    #[test]
352    fn equal_different_lengths() {
353        assert!(!equal("abc", "abcd"));
354    }
355
356    #[test]
357    fn equal_empty_strings() {
358        assert!(equal("", ""));
359    }
360
361    // -- Cross-SDK BRC-121 test vector ----------------------------------------
362
363    #[test]
364    fn cross_sdk_canonical_json_vector() {
365        // This vector must match the Python SDK and TypeScript SDK outputs.
366        // Test vector: {"model":"gpt-4","seq":0,"confidence":null}
367        // canonical form (keys sorted): {"confidence":null,"model":"gpt-4","seq":0}
368        let v = json!({"model": "gpt-4", "seq": 0, "confidence": null});
369        let canon = canonical_json(&v).unwrap();
370        assert_eq!(canon, b"{\"confidence\":null,\"model\":\"gpt-4\",\"seq\":0}");
371    }
372}