icydb_core/value/bytes.rs
1use crate::value::Value;
2use canic::utils::hash::Xxh3;
3
4///
5/// Canonical Byte Representation
6///
7
8#[inline]
9fn feed_i32(h: &mut Xxh3, x: i32) {
10 h.update(&x.to_be_bytes());
11}
12#[inline]
13fn feed_i64(h: &mut Xxh3, x: i64) {
14 h.update(&x.to_be_bytes());
15}
16#[inline]
17fn feed_i128(h: &mut Xxh3, x: i128) {
18 h.update(&x.to_be_bytes());
19}
20#[inline]
21fn feed_u8(h: &mut Xxh3, x: u8) {
22 h.update(&[x]);
23}
24#[inline]
25fn feed_u32(h: &mut Xxh3, x: u32) {
26 h.update(&x.to_be_bytes());
27}
28#[inline]
29fn feed_u64(h: &mut Xxh3, x: u64) {
30 h.update(&x.to_be_bytes());
31}
32#[inline]
33fn feed_u128(h: &mut Xxh3, x: u128) {
34 h.update(&x.to_be_bytes());
35}
36
37#[inline]
38fn feed_bytes(h: &mut Xxh3, b: &[u8]) {
39 h.update(b);
40}
41
42#[allow(clippy::cast_possible_truncation)]
43impl Value {
44 ///
45 /// Compute a **canonical, deterministic 128-bit fingerprint** of this `Value`.
46 ///
47 /// This is *not* the same as serializing the value (e.g. with CBOR or Serde) and hashing:
48 /// - CBOR is not canonical by default (ints can have multiple encodings, maps can reorder keys, NaN payloads differ, etc.).
49 /// - Rust's internal layout is not stable across versions or platforms.
50 ///
51 /// Instead, we define our own **canonical byte representation**:
52 /// - Prefix with a fixed `VERSION` byte to allow evolution of the format.
53 /// - Prefix with a `ValueTag` to distinguish enum variants (`Int(1)` vs `Uint(1)`).
54 /// - Encode each variant deterministically (e.g. Decimal as sign/scale/mantissa).
55 /// - Recurse through lists element-by-element in order.
56 ///
57 /// ### Why?
58 /// - **Stable across upgrades / canisters**: the same logical value always yields the same hash.
59 /// - **Indexing**: provides a fixed-size `[u8; 16]` fingerprint for use in secondary indexes
60 /// and fast equality lookups.
61 /// - **Canonicalization**: ensures semantically equal values hash identically, avoiding
62 /// “same value, different bytes” bugs.
63 ///
64 /// Use this in query planning, index scans, and anywhere you need a compact,
65 /// reproducible identity for a `Value`.
66 ///
67 fn write_to_hasher(&self, h: &mut Xxh3) {
68 feed_u8(h, self.tag());
69
70 match self {
71 Self::Account(a) => {
72 feed_bytes(h, &a.to_bytes());
73 }
74 Self::Blob(v) => {
75 feed_u8(h, 0x01);
76 feed_bytes(h, v);
77 }
78 Self::Bool(b) => {
79 feed_u8(h, u8::from(*b));
80 }
81 Self::Date(d) => feed_i32(h, d.get()),
82 Self::Decimal(d) => {
83 // encode (sign, scale, mantissa) deterministically:
84 feed_u8(h, u8::from(d.is_sign_negative()));
85 feed_u32(h, d.scale());
86 feed_bytes(h, &d.mantissa().to_be_bytes());
87 }
88 Self::Duration(t) => {
89 feed_u64(h, t.get());
90 }
91 Self::Enum(v) => {
92 feed_u32(h, v.path.len() as u32);
93 feed_bytes(h, v.path.as_bytes());
94
95 feed_u32(h, v.variant.len() as u32);
96 feed_bytes(h, v.variant.as_bytes());
97 }
98 Self::E8s(v) => {
99 feed_u64(h, v.get());
100 }
101 Self::E18s(v) => {
102 feed_bytes(h, &v.to_be_bytes());
103 }
104 Self::Float32(v) => {
105 feed_bytes(h, &v.to_be_bytes());
106 }
107 Self::Float64(v) => {
108 feed_bytes(h, &v.to_be_bytes());
109 }
110 Self::Int(i) => {
111 feed_i64(h, *i);
112 }
113 Self::Int128(i) => {
114 feed_i128(h, i.get());
115 }
116 Self::IntBig(v) => {
117 feed_bytes(h, &v.to_leb128());
118 }
119 Self::List(xs) => {
120 feed_u32(h, xs.len() as u32);
121 for x in xs {
122 feed_u8(h, 0xFF);
123 x.write_to_hasher(h); // recurse, no sub-hash
124 }
125 }
126 Self::Principal(p) => {
127 let raw = p.as_slice();
128 feed_u32(h, raw.len() as u32);
129 feed_bytes(h, raw);
130 }
131 Self::Subaccount(s) => {
132 feed_bytes(h, &s.to_bytes());
133 }
134 Self::Text(s) => {
135 // If you need case/Unicode insensitivity, normalize; else skip (much faster)
136 // let norm = normalize_nfkc_casefold(s);
137 // feed_u32( h, norm.len() as u32);
138 // feed_bytes( h, norm.as_bytes());
139 feed_u32(h, s.len() as u32);
140 feed_bytes(h, s.as_bytes());
141 }
142 Self::Timestamp(t) => {
143 feed_u64(h, t.get());
144 }
145 Self::Uint(u) => {
146 feed_u64(h, *u);
147 }
148 Self::Uint128(u) => {
149 feed_u128(h, u.get());
150 }
151 Self::UintBig(v) => {
152 feed_bytes(h, &v.to_leb128());
153 }
154 Self::Ulid(u) => {
155 feed_bytes(h, &u.to_bytes());
156 }
157 Self::None | Self::Unit | Self::Unsupported => {}
158 }
159 }
160
161 #[must_use]
162 pub fn hash_value(&self) -> [u8; 16] {
163 const VERSION: u8 = 1;
164
165 let mut h = Xxh3::with_seed(0);
166 feed_u8(&mut h, VERSION); // version
167
168 self.write_to_hasher(&mut h);
169 h.digest128().to_be_bytes()
170 }
171}