Skip to main content

schema/
hash.rs

1//! Deterministic schema hashing.
2
3use blake3::Hasher;
4
5use crate::{ChangePolicy, FieldCodec, FixedPoint, Schema};
6
7/// Computes a deterministic hash for schema validation.
8#[must_use]
9pub fn schema_hash(schema: &Schema) -> u64 {
10    let mut hasher = Hasher::new();
11    write_u32(&mut hasher, schema.components.len() as u32);
12
13    for component in &schema.components {
14        write_u16(&mut hasher, component.id.get());
15        write_u32(&mut hasher, component.fields.len() as u32);
16
17        for field in &component.fields {
18            write_u16(&mut hasher, field.id.get());
19            write_codec(&mut hasher, field.codec);
20            write_change_policy(&mut hasher, field.change);
21        }
22    }
23
24    let hash = hasher.finalize();
25    let bytes = hash.as_bytes();
26    u64::from_le_bytes(bytes[0..8].try_into().unwrap())
27}
28
29fn write_codec(hasher: &mut Hasher, codec: FieldCodec) {
30    match codec {
31        FieldCodec::Bool => {
32            write_u8(hasher, 0);
33        }
34        FieldCodec::UInt { bits } => {
35            write_u8(hasher, 1);
36            write_u8(hasher, bits);
37        }
38        FieldCodec::SInt { bits } => {
39            write_u8(hasher, 2);
40            write_u8(hasher, bits);
41        }
42        FieldCodec::VarUInt => {
43            write_u8(hasher, 3);
44        }
45        FieldCodec::VarSInt => {
46            write_u8(hasher, 4);
47        }
48        FieldCodec::FixedPoint(fp) => {
49            write_u8(hasher, 5);
50            write_fixed_point(hasher, fp);
51        }
52    }
53}
54
55fn write_change_policy(hasher: &mut Hasher, policy: ChangePolicy) {
56    match policy {
57        ChangePolicy::Always => {
58            write_u8(hasher, 0);
59        }
60        ChangePolicy::Threshold { threshold_q } => {
61            write_u8(hasher, 1);
62            write_u32(hasher, threshold_q);
63        }
64    }
65}
66
67fn write_fixed_point(hasher: &mut Hasher, fp: FixedPoint) {
68    write_i64(hasher, fp.min_q);
69    write_i64(hasher, fp.max_q);
70    write_u32(hasher, fp.scale);
71}
72
73fn write_u8(hasher: &mut Hasher, value: u8) {
74    hasher.update(&[value]);
75}
76
77fn write_u16(hasher: &mut Hasher, value: u16) {
78    hasher.update(&value.to_le_bytes());
79}
80
81fn write_u32(hasher: &mut Hasher, value: u32) {
82    hasher.update(&value.to_le_bytes());
83}
84
85fn write_i64(hasher: &mut Hasher, value: i64) {
86    hasher.update(&value.to_le_bytes());
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92    use crate::{ComponentDef, ComponentId, FieldCodec, FieldDef, FieldId, Schema};
93
94    fn cid(value: u16) -> ComponentId {
95        ComponentId::new(value).unwrap()
96    }
97
98    fn fid(value: u16) -> FieldId {
99        FieldId::new(value).unwrap()
100    }
101
102    #[test]
103    fn schema_hash_is_stable() {
104        let component = ComponentDef::new(cid(1))
105            .field(FieldDef::new(fid(1), FieldCodec::bool()))
106            .field(FieldDef::with_threshold(fid(2), FieldCodec::uint(8), 2));
107        let schema = Schema::new(vec![component]).unwrap();
108
109        let hash1 = schema_hash(&schema);
110        let hash2 = schema_hash(&schema);
111        assert_eq!(hash1, hash2);
112    }
113
114    #[test]
115    fn schema_hash_golden() {
116        let component = ComponentDef::new(cid(10))
117            .field(FieldDef::new(fid(1), FieldCodec::bool()))
118            .field(FieldDef::new(fid(2), FieldCodec::sint(12)))
119            .field(FieldDef::with_threshold(fid(3), FieldCodec::uint(5), 3))
120            .field(FieldDef::new(
121                fid(4),
122                FieldCodec::fixed_point(-500, 500, 100),
123            ));
124        let schema = Schema::new(vec![component]).unwrap();
125
126        let hash = schema_hash(&schema);
127        assert_eq!(hash, 0x9320_BE45_8A81_5FCB);
128    }
129
130    #[test]
131    fn schema_hash_changes_with_component_order() {
132        let c1 = ComponentDef::new(cid(1)).field(FieldDef::new(fid(1), FieldCodec::bool()));
133        let c2 = ComponentDef::new(cid(2)).field(FieldDef::new(fid(1), FieldCodec::uint(8)));
134
135        let schema_a = Schema::new(vec![c1.clone(), c2.clone()]).unwrap();
136        let schema_b = Schema::new(vec![c2, c1]).unwrap();
137
138        assert_ne!(schema_hash(&schema_a), schema_hash(&schema_b));
139    }
140
141    #[test]
142    fn schema_hash_changes_with_field_order() {
143        let c1 = ComponentDef::new(cid(1))
144            .field(FieldDef::new(fid(1), FieldCodec::bool()))
145            .field(FieldDef::new(fid(2), FieldCodec::uint(8)));
146        let c2 = ComponentDef::new(cid(1))
147            .field(FieldDef::new(fid(2), FieldCodec::uint(8)))
148            .field(FieldDef::new(fid(1), FieldCodec::bool()));
149
150        let schema_a = Schema::new(vec![c1]).unwrap();
151        let schema_b = Schema::new(vec![c2]).unwrap();
152
153        assert_ne!(schema_hash(&schema_a), schema_hash(&schema_b));
154    }
155}