icydb_core/value/
bytes.rs

1use crate::value::Value;
2use canic_utils::hash::Xxh3;
3
4///
5/// ValueTag
6///
7/// Can we remove ValueTag?
8/// Yes, technically.
9///
10/// Should we?
11/// Almost certainly no, unless you control all serialization + don’t need hashing + don’t care about stability.
12///
13/// Why keep it?
14/// Binary stability, hashing, sorting, versioning, IC-safe ABI, robustness.
15///
16
17#[repr(u8)]
18#[derive(Clone, Copy, Debug, Eq, PartialEq)]
19pub enum ValueTag {
20    Account = 1,
21    Blob = 2,
22    Bool = 3,
23    Date = 4,
24    Decimal = 5,
25    Duration = 6,
26    Enum = 7,
27    E8s = 8,
28    E18s = 9,
29    Float32 = 10,
30    Float64 = 11,
31    Int = 12,
32    Int128 = 13,
33    IntBig = 14,
34    List = 15,
35    None = 16,
36    Principal = 17,
37    Subaccount = 18,
38    Text = 19,
39    Timestamp = 20,
40    Uint = 21,
41    Uint128 = 22,
42    UintBig = 23,
43    Ulid = 24,
44    Unit = 25,
45    Unsupported = 26,
46}
47
48impl ValueTag {
49    #[must_use]
50    pub const fn to_u8(self) -> u8 {
51        self as u8
52    }
53}
54
55impl Value {
56    ///
57    /// HASHING
58    ///
59
60    #[must_use]
61    pub const fn tag(&self) -> u8 {
62        match self {
63            Self::Account(_) => ValueTag::Account,
64            Self::Blob(_) => ValueTag::Blob,
65            Self::Bool(_) => ValueTag::Bool,
66            Self::Date(_) => ValueTag::Date,
67            Self::Decimal(_) => ValueTag::Decimal,
68            Self::Duration(_) => ValueTag::Duration,
69            Self::Enum(_) => ValueTag::Enum,
70            Self::E8s(_) => ValueTag::E8s,
71            Self::E18s(_) => ValueTag::E18s,
72            Self::Float32(_) => ValueTag::Float32,
73            Self::Float64(_) => ValueTag::Float64,
74            Self::Int(_) => ValueTag::Int,
75            Self::Int128(_) => ValueTag::Int128,
76            Self::IntBig(_) => ValueTag::IntBig,
77            Self::List(_) => ValueTag::List,
78            Self::None => ValueTag::None,
79            Self::Principal(_) => ValueTag::Principal,
80            Self::Subaccount(_) => ValueTag::Subaccount,
81            Self::Text(_) => ValueTag::Text,
82            Self::Timestamp(_) => ValueTag::Timestamp,
83            Self::Uint(_) => ValueTag::Uint,
84            Self::Uint128(_) => ValueTag::Uint128,
85            Self::UintBig(_) => ValueTag::UintBig,
86            Self::Ulid(_) => ValueTag::Ulid,
87            Self::Unit => ValueTag::Unit,
88            Self::Unsupported => ValueTag::Unsupported,
89        }
90        .to_u8()
91    }
92}
93
94///
95/// Canonical Byte Representation
96///
97
98#[inline]
99fn feed_i32(h: &mut Xxh3, x: i32) {
100    h.update(&x.to_be_bytes());
101}
102#[inline]
103fn feed_i64(h: &mut Xxh3, x: i64) {
104    h.update(&x.to_be_bytes());
105}
106#[inline]
107fn feed_i128(h: &mut Xxh3, x: i128) {
108    h.update(&x.to_be_bytes());
109}
110#[inline]
111fn feed_u8(h: &mut Xxh3, x: u8) {
112    h.update(&[x]);
113}
114#[inline]
115fn feed_u32(h: &mut Xxh3, x: u32) {
116    h.update(&x.to_be_bytes());
117}
118#[inline]
119fn feed_u64(h: &mut Xxh3, x: u64) {
120    h.update(&x.to_be_bytes());
121}
122#[inline]
123fn feed_u128(h: &mut Xxh3, x: u128) {
124    h.update(&x.to_be_bytes());
125}
126
127#[inline]
128fn feed_bytes(h: &mut Xxh3, b: &[u8]) {
129    h.update(b);
130}
131
132#[allow(clippy::cast_possible_truncation)]
133impl Value {
134    ///
135    /// Compute a **canonical, deterministic 128-bit fingerprint** of this `Value`.
136    ///
137    /// This is *not* the same as serializing the value (e.g. with CBOR or Serde) and hashing:
138    /// - CBOR is not canonical by default (ints can have multiple encodings, maps can reorder keys, NaN payloads differ, etc.).
139    /// - Rust's internal layout is not stable across versions or platforms.
140    ///
141    /// Instead, we define our own **canonical byte representation**:
142    /// - Prefix with a fixed `VERSION` byte to allow evolution of the format.
143    /// - Prefix with a `ValueTag` to distinguish enum variants (`Int(1)` vs `Uint(1)`).
144    /// - Encode each variant deterministically (e.g. Decimal as sign/scale/mantissa).
145    /// - Recurse through lists element-by-element in order.
146    ///
147    /// ### Why?
148    /// - **Stable across upgrades / canisters**: the same logical value always yields the same hash.
149    /// - **Indexing**: provides a fixed-size `[u8; 16]` fingerprint for use in secondary indexes
150    ///   and fast equality lookups.
151    /// - **Canonicalization**: ensures semantically equal values hash identically, avoiding
152    ///   “same value, different bytes” bugs.
153    ///
154    /// Use this in query planning, index scans, and anywhere you need a compact,
155    /// reproducible identity for a `Value`.
156    ///
157    fn write_to_hasher(&self, h: &mut Xxh3) {
158        feed_u8(h, self.tag());
159
160        match self {
161            Self::Account(a) => {
162                feed_bytes(h, &a.to_bytes());
163            }
164            Self::Blob(v) => {
165                feed_u8(h, 0x01);
166                feed_bytes(h, v);
167            }
168            Self::Bool(b) => {
169                feed_u8(h, u8::from(*b));
170            }
171            Self::Date(d) => feed_i32(h, d.get()),
172            Self::Decimal(d) => {
173                // encode (sign, scale, mantissa) deterministically:
174                feed_u8(h, u8::from(d.is_sign_negative()));
175                feed_u32(h, d.scale());
176                feed_bytes(h, &d.mantissa().to_be_bytes());
177            }
178            Self::Duration(t) => {
179                feed_u64(h, t.get());
180            }
181            Self::Enum(v) => {
182                match &v.path {
183                    Some(path) => {
184                        feed_u8(h, 0x01); // path present
185                        feed_u32(h, path.len() as u32);
186                        feed_bytes(h, path.as_bytes());
187                    }
188                    None => feed_u8(h, 0x00), // path absent → loose match
189                }
190
191                feed_u32(h, v.variant.len() as u32);
192                feed_bytes(h, v.variant.as_bytes());
193
194                match &v.payload {
195                    Some(payload) => {
196                        feed_u8(h, 0x01); // payload present
197                        payload.write_to_hasher(h); // include nested value
198                    }
199                    None => feed_u8(h, 0x00),
200                }
201            }
202            Self::E8s(v) => {
203                feed_u64(h, v.get());
204            }
205            Self::E18s(v) => {
206                feed_bytes(h, &v.to_be_bytes());
207            }
208            Self::Float32(v) => {
209                feed_bytes(h, &v.to_be_bytes());
210            }
211            Self::Float64(v) => {
212                feed_bytes(h, &v.to_be_bytes());
213            }
214            Self::Int(i) => {
215                feed_i64(h, *i);
216            }
217            Self::Int128(i) => {
218                feed_i128(h, i.get());
219            }
220            Self::IntBig(v) => {
221                feed_bytes(h, &v.to_leb128());
222            }
223            Self::List(xs) => {
224                feed_u32(h, xs.len() as u32);
225                for x in xs {
226                    feed_u8(h, 0xFF);
227                    x.write_to_hasher(h); // recurse, no sub-hash
228                }
229            }
230            Self::Principal(p) => {
231                let raw = p.as_slice();
232                feed_u32(h, raw.len() as u32);
233                feed_bytes(h, raw);
234            }
235            Self::Subaccount(s) => {
236                feed_bytes(h, &s.to_bytes());
237            }
238            Self::Text(s) => {
239                // If you need case/Unicode insensitivity, normalize; else skip (much faster)
240                // let norm = normalize_nfkc_casefold(s);
241                // feed_u32( h, norm.len() as u32);
242                // feed_bytes( h, norm.as_bytes());
243                feed_u32(h, s.len() as u32);
244                feed_bytes(h, s.as_bytes());
245            }
246            Self::Timestamp(t) => {
247                feed_u64(h, t.get());
248            }
249            Self::Uint(u) => {
250                feed_u64(h, *u);
251            }
252            Self::Uint128(u) => {
253                feed_u128(h, u.get());
254            }
255            Self::UintBig(v) => {
256                feed_bytes(h, &v.to_leb128());
257            }
258            Self::Ulid(u) => {
259                feed_bytes(h, &u.to_bytes());
260            }
261            Self::None | Self::Unit | Self::Unsupported => {}
262        }
263    }
264
265    #[must_use]
266    /// Stable hash used for index/storage fingerprints.
267    pub fn hash_value(&self) -> [u8; 16] {
268        const VERSION: u8 = 1;
269
270        let mut h = Xxh3::with_seed(0);
271        feed_u8(&mut h, VERSION); // version
272
273        self.write_to_hasher(&mut h);
274        h.digest128().to_be_bytes()
275    }
276}