Skip to main content

packr_abi/
hash.rs

1//! Merkle-tree hashing for Pack types.
2//!
3//! Types are hashed structurally - field names are included, type names are not.
4//! Names are bindings at the interface level, not part of the type's identity.
5//!
6//! # Design
7//!
8//! - Primitives have fixed hashes (constants)
9//! - Compound types hash their components: `hash(list<T>) = hash("list", hash(T))`
10//! - Records hash their fields: `hash({ x: s32, y: s32 }) = hash("record", [("x", hash(s32)), ("y", hash(s32))])`
11//! - Type names are NOT included - `Point` and `Vec2` with same structure have same hash
12//! - Interface bindings include names: `("Point", type_hash)`
13//!
14//! This creates a Merkle tree where:
15//! - Structural sharing is natural (same structure = same hash)
16//! - Type compatibility is O(1) hash comparison
17//! - Interfaces can bind different names to the same type hash
18
19use sha2::{Digest, Sha256};
20
21#[cfg(feature = "std")]
22use alloc::string::String;
23
24/// A 256-bit type hash.
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
26pub struct TypeHash([u8; 32]);
27
28impl TypeHash {
29    /// Create a TypeHash from raw bytes.
30    pub const fn from_bytes(bytes: [u8; 32]) -> Self {
31        Self(bytes)
32    }
33
34    /// Get the raw bytes.
35    pub fn as_bytes(&self) -> &[u8; 32] {
36        &self.0
37    }
38
39    /// Convert to a tuple of 4 u64s (for WASM-friendly representation).
40    pub fn to_u64s(&self) -> (u64, u64, u64, u64) {
41        let b = &self.0;
42        (
43            u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
44            u64::from_le_bytes([b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]]),
45            u64::from_le_bytes([b[16], b[17], b[18], b[19], b[20], b[21], b[22], b[23]]),
46            u64::from_le_bytes([b[24], b[25], b[26], b[27], b[28], b[29], b[30], b[31]]),
47        )
48    }
49
50    /// Create from a tuple of 4 u64s.
51    pub fn from_u64s(a: u64, b: u64, c: u64, d: u64) -> Self {
52        let mut bytes = [0u8; 32];
53        bytes[0..8].copy_from_slice(&a.to_le_bytes());
54        bytes[8..16].copy_from_slice(&b.to_le_bytes());
55        bytes[16..24].copy_from_slice(&c.to_le_bytes());
56        bytes[24..32].copy_from_slice(&d.to_le_bytes());
57        Self(bytes)
58    }
59
60    /// Format as hex string (for debugging).
61    #[cfg(feature = "std")]
62    pub fn to_hex(&self) -> String {
63        use core::fmt::Write;
64        let mut s = String::with_capacity(self.0.len() * 2);
65        for b in &self.0 {
66            let _ = write!(s, "{:02x}", b);
67        }
68        s
69    }
70}
71
72// ============================================================================
73// Primitive Type Hashes (fixed constants)
74// ============================================================================
75
76// These are computed as SHA-256 of the type name, but we store them as constants
77// for efficiency. The actual values don't matter as long as they're unique.
78
79/// Hash for the `bool` type.
80pub const HASH_BOOL: TypeHash = TypeHash::from_bytes([
81    0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
82    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
83]);
84
85/// Hash for the `u8` type.
86pub const HASH_U8: TypeHash = TypeHash::from_bytes([
87    0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
88    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
89]);
90
91/// Hash for the `u16` type.
92pub const HASH_U16: TypeHash = TypeHash::from_bytes([
93    0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
94    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
95]);
96
97/// Hash for the `u32` type.
98pub const HASH_U32: TypeHash = TypeHash::from_bytes([
99    0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
100    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
101]);
102
103/// Hash for the `u64` type.
104pub const HASH_U64: TypeHash = TypeHash::from_bytes([
105    0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
106    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
107]);
108
109/// Hash for the `s8` type.
110pub const HASH_S8: TypeHash = TypeHash::from_bytes([
111    0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
112    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
113]);
114
115/// Hash for the `s16` type.
116pub const HASH_S16: TypeHash = TypeHash::from_bytes([
117    0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
118    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
119]);
120
121/// Hash for the `s32` type.
122pub const HASH_S32: TypeHash = TypeHash::from_bytes([
123    0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
124    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
125]);
126
127/// Hash for the `s64` type.
128pub const HASH_S64: TypeHash = TypeHash::from_bytes([
129    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
130    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
131]);
132
133/// Hash for the `f32` type.
134pub const HASH_F32: TypeHash = TypeHash::from_bytes([
135    0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
136    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
137]);
138
139/// Hash for the `f64` type.
140pub const HASH_F64: TypeHash = TypeHash::from_bytes([
141    0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
142    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
143]);
144
145/// Hash for the `char` type.
146pub const HASH_CHAR: TypeHash = TypeHash::from_bytes([
147    0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
148    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
149]);
150
151/// Hash for the `string` type.
152pub const HASH_STRING: TypeHash = TypeHash::from_bytes([
153    0x00, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
154    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
155]);
156
157/// Hash for the `flags` type.
158pub const HASH_FLAGS: TypeHash = TypeHash::from_bytes([
159    0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
160    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
161]);
162
163// ============================================================================
164// Hash Builder
165// ============================================================================
166
167/// Builder for computing type hashes.
168pub struct TypeHasher {
169    hasher: Sha256,
170}
171
172impl TypeHasher {
173    /// Create a new hasher.
174    pub fn new() -> Self {
175        Self {
176            hasher: Sha256::new(),
177        }
178    }
179
180    /// Add a tag byte (identifies the kind of construct being hashed).
181    pub fn tag(mut self, tag: u8) -> Self {
182        self.hasher.update([tag]);
183        self
184    }
185
186    /// Add a string (length-prefixed).
187    pub fn string(mut self, s: &str) -> Self {
188        self.hasher.update((s.len() as u32).to_le_bytes());
189        self.hasher.update(s.as_bytes());
190        self
191    }
192
193    /// Add a child type hash.
194    pub fn child(mut self, hash: &TypeHash) -> Self {
195        self.hasher.update(hash.as_bytes());
196        self
197    }
198
199    /// Add a count (for lists of children).
200    pub fn count(mut self, n: usize) -> Self {
201        self.hasher.update((n as u32).to_le_bytes());
202        self
203    }
204
205    /// Finalize and return the hash.
206    pub fn finish(self) -> TypeHash {
207        let result = self.hasher.finalize();
208        let mut bytes = [0u8; 32];
209        bytes.copy_from_slice(&result);
210        TypeHash(bytes)
211    }
212}
213
214impl Default for TypeHasher {
215    fn default() -> Self {
216        Self::new()
217    }
218}
219
220// ============================================================================
221// Hash Tags (identifies type constructors)
222// ============================================================================
223
224const TAG_LIST: u8 = 0x10;
225const TAG_OPTION: u8 = 0x11;
226const TAG_RESULT: u8 = 0x12;
227const TAG_TUPLE: u8 = 0x13;
228const TAG_RECORD: u8 = 0x14;
229const TAG_VARIANT: u8 = 0x15;
230const TAG_FUNCTION: u8 = 0x16;
231const TAG_INTERFACE: u8 = 0x17;
232
233// ============================================================================
234// Compound Type Hashing
235// ============================================================================
236
237/// Hash a list type: `list<T>`.
238pub fn hash_list(element: &TypeHash) -> TypeHash {
239    TypeHasher::new().tag(TAG_LIST).child(element).finish()
240}
241
242/// Hash an option type: `option<T>`.
243pub fn hash_option(inner: &TypeHash) -> TypeHash {
244    TypeHasher::new().tag(TAG_OPTION).child(inner).finish()
245}
246
247/// Hash a result type: `result<T, E>`.
248pub fn hash_result(ok: &TypeHash, err: &TypeHash) -> TypeHash {
249    TypeHasher::new()
250        .tag(TAG_RESULT)
251        .child(ok)
252        .child(err)
253        .finish()
254}
255
256/// Hash a tuple type: `tuple<T1, T2, ...>`.
257pub fn hash_tuple(elements: &[TypeHash]) -> TypeHash {
258    let mut hasher = TypeHasher::new().tag(TAG_TUPLE).count(elements.len());
259
260    for elem in elements {
261        hasher = hasher.child(elem);
262    }
263
264    hasher.finish()
265}
266
267/// Hash a record type (structural - name NOT included).
268/// Fields should be in canonical order (sorted by name).
269pub fn hash_record(fields: &[(&str, TypeHash)]) -> TypeHash {
270    let mut hasher = TypeHasher::new().tag(TAG_RECORD).count(fields.len());
271
272    for (name, type_hash) in fields {
273        hasher = hasher.string(name).child(type_hash);
274    }
275
276    hasher.finish()
277}
278
279/// Hash a variant type (structural - name NOT included).
280/// Cases should be in canonical order (sorted by name).
281pub fn hash_variant(cases: &[(&str, Option<TypeHash>)]) -> TypeHash {
282    let mut hasher = TypeHasher::new().tag(TAG_VARIANT).count(cases.len());
283
284    for (name, payload) in cases {
285        hasher = hasher.string(name);
286        if let Some(type_hash) = payload {
287            hasher = hasher.tag(1).child(type_hash);
288        } else {
289            hasher = hasher.tag(0);
290        }
291    }
292
293    hasher.finish()
294}
295
296/// Hash a function signature.
297/// Param names are NOT included (just types in order).
298/// Result types are included in order.
299pub fn hash_function(params: &[TypeHash], results: &[TypeHash]) -> TypeHash {
300    let mut hasher = TypeHasher::new().tag(TAG_FUNCTION).count(params.len());
301
302    for param in params {
303        hasher = hasher.child(param);
304    }
305
306    hasher = hasher.count(results.len());
307    for result in results {
308        hasher = hasher.child(result);
309    }
310
311    hasher.finish()
312}
313
314/// An interface binding: name -> hash.
315pub struct Binding<'a> {
316    pub name: &'a str,
317    pub hash: TypeHash,
318}
319
320/// Hash an interface (includes binding names).
321/// Bindings should be in canonical order (sorted by name).
322pub fn hash_interface(
323    name: &str,
324    type_bindings: &[Binding<'_>],
325    func_bindings: &[Binding<'_>],
326) -> TypeHash {
327    let mut hasher = TypeHasher::new()
328        .tag(TAG_INTERFACE)
329        .string(name)
330        .count(type_bindings.len());
331
332    for binding in type_bindings {
333        hasher = hasher.string(binding.name).child(&binding.hash);
334    }
335
336    hasher = hasher.count(func_bindings.len());
337    for binding in func_bindings {
338        hasher = hasher.string(binding.name).child(&binding.hash);
339    }
340
341    hasher.finish()
342}
343
344// ============================================================================
345// Recursive Type Hashing
346// ============================================================================
347
348/// Placeholder for self-references in recursive types.
349///
350/// When hashing `variant sexpr { lst(list<sexpr>) }`:
351/// 1. First pass: use HASH_SELF_REF for the recursive reference
352/// 2. This produces a "template hash"
353/// 3. The template hash IS the final hash (self-reference is structural)
354pub const HASH_SELF_REF: TypeHash = TypeHash::from_bytes([
355    0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
356    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
357]);
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn test_primitive_hashes_are_unique() {
365        let primitives = [
366            HASH_BOOL,
367            HASH_U8,
368            HASH_U16,
369            HASH_U32,
370            HASH_U64,
371            HASH_S8,
372            HASH_S16,
373            HASH_S32,
374            HASH_S64,
375            HASH_F32,
376            HASH_F64,
377            HASH_CHAR,
378            HASH_STRING,
379            HASH_FLAGS,
380        ];
381
382        for (i, a) in primitives.iter().enumerate() {
383            for (j, b) in primitives.iter().enumerate() {
384                if i != j {
385                    assert_ne!(a, b, "primitive hashes must be unique");
386                }
387            }
388        }
389    }
390
391    #[test]
392    fn test_list_hash() {
393        let list_s32 = hash_list(&HASH_S32);
394        let list_s64 = hash_list(&HASH_S64);
395
396        assert_ne!(list_s32, list_s64);
397        assert_ne!(list_s32, HASH_S32); // list<s32> != s32
398    }
399
400    #[test]
401    fn test_record_hash_is_structural() {
402        // Same structure, would have different names in source
403        let point = hash_record(&[("x", HASH_S32), ("y", HASH_S32)]);
404        let vec2 = hash_record(&[("x", HASH_S32), ("y", HASH_S32)]);
405
406        assert_eq!(point, vec2, "same structure = same hash");
407    }
408
409    #[test]
410    fn test_record_hash_includes_field_names() {
411        let xy = hash_record(&[("x", HASH_S32), ("y", HASH_S32)]);
412        let ab = hash_record(&[("a", HASH_S32), ("b", HASH_S32)]);
413
414        assert_ne!(xy, ab, "different field names = different hash");
415    }
416
417    #[test]
418    fn test_tuple_vs_record() {
419        let tuple = hash_tuple(&[HASH_S32, HASH_S32]);
420        let record = hash_record(&[("x", HASH_S32), ("y", HASH_S32)]);
421
422        assert_ne!(tuple, record, "tuple != record");
423    }
424
425    #[test]
426    fn test_function_hash() {
427        let add = hash_function(&[HASH_S32, HASH_S32], &[HASH_S32]);
428        let sub = hash_function(&[HASH_S32, HASH_S32], &[HASH_S32]);
429
430        // Same signature = same hash (function names are bindings, not part of hash)
431        assert_eq!(add, sub);
432    }
433
434    #[test]
435    fn test_interface_includes_names() {
436        let iface_a = hash_interface(
437            "math",
438            &[],
439            &[Binding {
440                name: "add",
441                hash: hash_function(&[HASH_S32], &[HASH_S32]),
442            }],
443        );
444
445        let iface_b = hash_interface(
446            "math",
447            &[],
448            &[Binding {
449                name: "inc",
450                hash: hash_function(&[HASH_S32], &[HASH_S32]),
451            }],
452        );
453
454        // Different binding names = different interface hash
455        assert_ne!(iface_a, iface_b);
456    }
457
458    #[test]
459    fn test_u64_roundtrip() {
460        let hash = hash_list(&HASH_S32);
461        let (a, b, c, d) = hash.to_u64s();
462        let back = TypeHash::from_u64s(a, b, c, d);
463
464        assert_eq!(hash, back);
465    }
466}