icydb_core/db/index/
fingerprint.rs

1use crate::value::Value;
2use canic_utils::hash::Xxh3;
3
4///
5/// ValueTag
6///
7/// Can we remove ValueTag?
8/// Yes, technically.
9///
10/// Should we?
11/// Almost certainly no, unless you control all serialization + don't need hashing + don't care about stability.
12///
13/// Why keep it?
14/// Binary stability, hashing, sorting, versioning, IC-safe ABI, robustness.
15///
16
17#[repr(u8)]
18#[derive(Clone, Copy, Debug, Eq, PartialEq)]
19pub enum ValueTag {
20    Account = 1,
21    Blob = 2,
22    Bool = 3,
23    Date = 4,
24    Decimal = 5,
25    Duration = 6,
26    Enum = 7,
27    E8s = 8,
28    E18s = 9,
29    Float32 = 10,
30    Float64 = 11,
31    Int = 12,
32    Int128 = 13,
33    IntBig = 14,
34    List = 15,
35    None = 16,
36    Principal = 17,
37    Subaccount = 18,
38    Text = 19,
39    Timestamp = 20,
40    Uint = 21,
41    Uint128 = 22,
42    UintBig = 23,
43    Ulid = 24,
44    Unit = 25,
45    Unsupported = 26,
46}
47
48impl ValueTag {
49    #[must_use]
50    pub const fn to_u8(self) -> u8 {
51        self as u8
52    }
53}
54
55///
56/// Canonical Byte Representation
57///
58
59const fn value_tag(value: &Value) -> u8 {
60    match value {
61        Value::Account(_) => ValueTag::Account,
62        Value::Blob(_) => ValueTag::Blob,
63        Value::Bool(_) => ValueTag::Bool,
64        Value::Date(_) => ValueTag::Date,
65        Value::Decimal(_) => ValueTag::Decimal,
66        Value::Duration(_) => ValueTag::Duration,
67        Value::Enum(_) => ValueTag::Enum,
68        Value::E8s(_) => ValueTag::E8s,
69        Value::E18s(_) => ValueTag::E18s,
70        Value::Float32(_) => ValueTag::Float32,
71        Value::Float64(_) => ValueTag::Float64,
72        Value::Int(_) => ValueTag::Int,
73        Value::Int128(_) => ValueTag::Int128,
74        Value::IntBig(_) => ValueTag::IntBig,
75        Value::List(_) => ValueTag::List,
76        Value::None => ValueTag::None,
77        Value::Principal(_) => ValueTag::Principal,
78        Value::Subaccount(_) => ValueTag::Subaccount,
79        Value::Text(_) => ValueTag::Text,
80        Value::Timestamp(_) => ValueTag::Timestamp,
81        Value::Uint(_) => ValueTag::Uint,
82        Value::Uint128(_) => ValueTag::Uint128,
83        Value::UintBig(_) => ValueTag::UintBig,
84        Value::Ulid(_) => ValueTag::Ulid,
85        Value::Unit => ValueTag::Unit,
86        Value::Unsupported => ValueTag::Unsupported,
87    }
88    .to_u8()
89}
90
91fn feed_i32(h: &mut Xxh3, x: i32) {
92    h.update(&x.to_be_bytes());
93}
94fn feed_i64(h: &mut Xxh3, x: i64) {
95    h.update(&x.to_be_bytes());
96}
97fn feed_i128(h: &mut Xxh3, x: i128) {
98    h.update(&x.to_be_bytes());
99}
100fn feed_u8(h: &mut Xxh3, x: u8) {
101    h.update(&[x]);
102}
103fn feed_u32(h: &mut Xxh3, x: u32) {
104    h.update(&x.to_be_bytes());
105}
106fn feed_u64(h: &mut Xxh3, x: u64) {
107    h.update(&x.to_be_bytes());
108}
109fn feed_u128(h: &mut Xxh3, x: u128) {
110    h.update(&x.to_be_bytes());
111}
112fn feed_bytes(h: &mut Xxh3, b: &[u8]) {
113    h.update(b);
114}
115
116#[allow(clippy::cast_possible_truncation)]
117fn write_to_hasher(value: &Value, h: &mut Xxh3) {
118    feed_u8(h, value_tag(value));
119
120    match value {
121        Value::Account(a) => {
122            feed_bytes(h, &a.to_bytes());
123        }
124        Value::Blob(v) => {
125            feed_u8(h, 0x01);
126            feed_bytes(h, v);
127        }
128        Value::Bool(b) => {
129            feed_u8(h, u8::from(*b));
130        }
131        Value::Date(d) => feed_i32(h, d.get()),
132        Value::Decimal(d) => {
133            // encode (sign, scale, mantissa) deterministically:
134            feed_u8(h, u8::from(d.is_sign_negative()));
135            feed_u32(h, d.scale());
136            feed_bytes(h, &d.mantissa().to_be_bytes());
137        }
138        Value::Duration(t) => {
139            feed_u64(h, t.get());
140        }
141        Value::Enum(v) => {
142            match &v.path {
143                Some(path) => {
144                    feed_u8(h, 0x01); // path present
145                    feed_u32(h, path.len() as u32);
146                    feed_bytes(h, path.as_bytes());
147                }
148                None => feed_u8(h, 0x00), // path absent -> loose match
149            }
150
151            feed_u32(h, v.variant.len() as u32);
152            feed_bytes(h, v.variant.as_bytes());
153
154            match &v.payload {
155                Some(payload) => {
156                    feed_u8(h, 0x01); // payload present
157                    write_to_hasher(payload, h); // include nested value
158                }
159                None => feed_u8(h, 0x00),
160            }
161        }
162        Value::E8s(v) => {
163            feed_u64(h, v.get());
164        }
165        Value::E18s(v) => {
166            feed_bytes(h, &v.to_be_bytes());
167        }
168        Value::Float32(v) => {
169            feed_bytes(h, &v.to_be_bytes());
170        }
171        Value::Float64(v) => {
172            feed_bytes(h, &v.to_be_bytes());
173        }
174        Value::Int(i) => {
175            feed_i64(h, *i);
176        }
177        Value::Int128(i) => {
178            feed_i128(h, i.get());
179        }
180        Value::IntBig(v) => {
181            feed_bytes(h, &v.to_leb128());
182        }
183        Value::List(xs) => {
184            feed_u32(h, xs.len() as u32);
185            for x in xs {
186                feed_u8(h, 0xFF);
187                write_to_hasher(x, h); // recurse, no sub-hash
188            }
189        }
190        Value::Principal(p) => {
191            let raw = p.as_slice();
192            feed_u32(h, raw.len() as u32);
193            feed_bytes(h, raw);
194        }
195        Value::Subaccount(s) => {
196            feed_bytes(h, &s.to_bytes());
197        }
198        Value::Text(s) => {
199            // If you need case/Unicode insensitivity, normalize; else skip (much faster)
200            // let norm = normalize_nfkc_casefold(s);
201            // feed_u32( h, norm.len() as u32);
202            // feed_bytes( h, norm.as_bytes());
203            feed_u32(h, s.len() as u32);
204            feed_bytes(h, s.as_bytes());
205        }
206        Value::Timestamp(t) => {
207            feed_u64(h, t.get());
208        }
209        Value::Uint(u) => {
210            feed_u64(h, *u);
211        }
212        Value::Uint128(u) => {
213            feed_u128(h, u.get());
214        }
215        Value::UintBig(v) => {
216            feed_bytes(h, &v.to_leb128());
217        }
218        Value::Ulid(u) => {
219            feed_bytes(h, &u.to_bytes());
220        }
221        Value::None | Value::Unit | Value::Unsupported => {}
222    }
223}
224
225#[must_use]
226/// Stable hash used for index/storage fingerprints.
227pub fn hash_value(value: &Value) -> [u8; 16] {
228    const VERSION: u8 = 1;
229
230    let mut h = Xxh3::with_seed(0);
231    feed_u8(&mut h, VERSION); // version
232
233    write_to_hasher(value, &mut h);
234    h.digest128().to_be_bytes()
235}
236
237#[must_use]
238/// Stable 128-bit hash used for index keys; returns `None` for non-indexable values.
239pub fn to_index_fingerprint(value: &Value) -> Option<[u8; 16]> {
240    match value {
241        Value::None | Value::Unsupported => None,
242        _ => Some(hash_value(value)),
243    }
244}
245
246///
247/// TESTS
248///
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253    use crate::{
254        types::{Float32 as F32, Float64 as F64},
255        value::{Value, ValueEnum},
256    };
257
258    fn v_f64(x: f64) -> Value {
259        Value::Float64(F64::try_new(x).expect("finite f64"))
260    }
261    fn v_f32(x: f32) -> Value {
262        Value::Float32(F32::try_new(x).expect("finite f32"))
263    }
264    fn v_i(x: i64) -> Value {
265        Value::Int(x)
266    }
267    fn v_txt(s: &str) -> Value {
268        Value::Text(s.to_string())
269    }
270
271    #[test]
272    fn hash_is_deterministic_for_int() {
273        let v = Value::Int(42);
274        let a = hash_value(&v);
275        let b = hash_value(&v);
276        assert_eq!(a, b, "hash should be deterministic for same value");
277    }
278
279    #[test]
280    fn different_variants_produce_different_hashes() {
281        let a = hash_value(&Value::Int(5));
282        let b = hash_value(&Value::Uint(5));
283        assert_ne!(
284            a, b,
285            "Int(5) and Uint(5) must hash differently (different tag)"
286        );
287    }
288
289    #[test]
290    fn enum_hash_tracks_path_presence() {
291        let strict = Value::Enum(ValueEnum::new("A", Some("MyEnum")));
292        let loose = Value::Enum(ValueEnum::new("A", None));
293        assert_ne!(
294            hash_value(&strict),
295            hash_value(&loose),
296            "Enum hashes must differ when path is present vs absent"
297        );
298    }
299
300    #[test]
301    fn enum_hash_includes_payload() {
302        let base = ValueEnum::new("A", Some("MyEnum"));
303        let with_one = Value::Enum(base.clone().with_payload(Value::Uint(1)));
304        let with_two = Value::Enum(base.with_payload(Value::Uint(2)));
305
306        assert_ne!(
307            hash_value(&with_one),
308            hash_value(&with_two),
309            "Enum payload must influence hash/fingerprint"
310        );
311    }
312
313    #[test]
314    fn float32_and_float64_hash_differ() {
315        let a = hash_value(&v_f32(1.0));
316        let b = hash_value(&v_f64(1.0));
317        assert_ne!(
318            a, b,
319            "Float32 and Float64 must hash differently (different tag)"
320        );
321    }
322
323    #[test]
324    fn text_is_length_and_content_sensitive() {
325        let a = hash_value(&v_txt("foo"));
326        let b = hash_value(&v_txt("bar"));
327        assert_ne!(a, b, "different strings should hash differently");
328
329        let c = hash_value(&v_txt("foo"));
330        assert_eq!(a, c, "same string should hash the same");
331    }
332
333    #[test]
334    fn list_hash_is_order_sensitive() {
335        let l1 = Value::from_list(&[v_i(1), v_i(2)]);
336        let l2 = Value::from_list(&[v_i(2), v_i(1)]);
337        assert_ne!(
338            hash_value(&l1),
339            hash_value(&l2),
340            "list order should affect hash"
341        );
342    }
343
344    #[test]
345    fn list_hash_is_length_sensitive() {
346        let l1 = Value::from_list(&[v_i(1)]);
347        let l2 = Value::from_list(&[v_i(1), v_i(1)]);
348        assert_ne!(
349            hash_value(&l1),
350            hash_value(&l2),
351            "list length should affect hash"
352        );
353    }
354}