icydb_core/value/
bytes.rs

1use crate::value::Value;
2use canic_utils::hash::Xxh3;
3
4///
5/// ValueTag
6///
7/// Can we remove ValueTag?
8/// Yes, technically.
9///
10/// Should we?
11/// Almost certainly no, unless you control all serialization + don’t need hashing + don’t care about stability.
12///
13/// Why keep it?
14/// Binary stability, hashing, sorting, versioning, IC-safe ABI, robustness.
15///
16
17#[repr(u8)]
18#[derive(Clone, Copy, Debug, Eq, PartialEq)]
19pub enum ValueTag {
20    Account = 1,
21    Blob = 2,
22    Bool = 3,
23    Date = 4,
24    Decimal = 5,
25    Duration = 6,
26    Enum = 7,
27    E8s = 8,
28    E18s = 9,
29    Float32 = 10,
30    Float64 = 11,
31    Int = 12,
32    Int128 = 13,
33    IntBig = 14,
34    List = 15,
35    None = 16,
36    Principal = 17,
37    Subaccount = 18,
38    Text = 19,
39    Timestamp = 20,
40    Uint = 21,
41    Uint128 = 22,
42    UintBig = 23,
43    Ulid = 24,
44    Unit = 25,
45    Unsupported = 26,
46}
47
48impl ValueTag {
49    #[must_use]
50    pub const fn to_u8(self) -> u8 {
51        self as u8
52    }
53}
54
55impl Value {
56    ///
57    /// HASHING
58    ///
59
60    #[must_use]
61    pub const fn tag(&self) -> u8 {
62        match self {
63            Self::Account(_) => ValueTag::Account,
64            Self::Blob(_) => ValueTag::Blob,
65            Self::Bool(_) => ValueTag::Bool,
66            Self::Date(_) => ValueTag::Date,
67            Self::Decimal(_) => ValueTag::Decimal,
68            Self::Duration(_) => ValueTag::Duration,
69            Self::Enum(_) => ValueTag::Enum,
70            Self::E8s(_) => ValueTag::E8s,
71            Self::E18s(_) => ValueTag::E18s,
72            Self::Float32(_) => ValueTag::Float32,
73            Self::Float64(_) => ValueTag::Float64,
74            Self::Int(_) => ValueTag::Int,
75            Self::Int128(_) => ValueTag::Int128,
76            Self::IntBig(_) => ValueTag::IntBig,
77            Self::List(_) => ValueTag::List,
78            Self::None => ValueTag::None,
79            Self::Principal(_) => ValueTag::Principal,
80            Self::Subaccount(_) => ValueTag::Subaccount,
81            Self::Text(_) => ValueTag::Text,
82            Self::Timestamp(_) => ValueTag::Timestamp,
83            Self::Uint(_) => ValueTag::Uint,
84            Self::Uint128(_) => ValueTag::Uint128,
85            Self::UintBig(_) => ValueTag::UintBig,
86            Self::Ulid(_) => ValueTag::Ulid,
87            Self::Unit => ValueTag::Unit,
88            Self::Unsupported => ValueTag::Unsupported,
89        }
90        .to_u8()
91    }
92}
93
94///
95/// Canonical Byte Representation
96///
97
98fn feed_i32(h: &mut Xxh3, x: i32) {
99    h.update(&x.to_be_bytes());
100}
101fn feed_i64(h: &mut Xxh3, x: i64) {
102    h.update(&x.to_be_bytes());
103}
104fn feed_i128(h: &mut Xxh3, x: i128) {
105    h.update(&x.to_be_bytes());
106}
107fn feed_u8(h: &mut Xxh3, x: u8) {
108    h.update(&[x]);
109}
110fn feed_u32(h: &mut Xxh3, x: u32) {
111    h.update(&x.to_be_bytes());
112}
113fn feed_u64(h: &mut Xxh3, x: u64) {
114    h.update(&x.to_be_bytes());
115}
116fn feed_u128(h: &mut Xxh3, x: u128) {
117    h.update(&x.to_be_bytes());
118}
119fn feed_bytes(h: &mut Xxh3, b: &[u8]) {
120    h.update(b);
121}
122
123#[allow(clippy::cast_possible_truncation)]
124impl Value {
125    ///
126    /// Compute a **canonical, deterministic 128-bit fingerprint** of this `Value`.
127    ///
128    /// This is *not* the same as serializing the value (e.g. with CBOR or Serde) and hashing:
129    /// - CBOR is not canonical by default (ints can have multiple encodings, maps can reorder keys, NaN payloads differ, etc.).
130    /// - Rust's internal layout is not stable across versions or platforms.
131    ///
132    /// Instead, we define our own **canonical byte representation**:
133    /// - Prefix with a fixed `VERSION` byte to allow evolution of the format.
134    /// - Prefix with a `ValueTag` to distinguish enum variants (`Int(1)` vs `Uint(1)`).
135    /// - Encode each variant deterministically (e.g. Decimal as sign/scale/mantissa).
136    /// - Recurse through lists element-by-element in order.
137    ///
138    /// ### Why?
139    /// - **Stable across upgrades / canisters**: the same logical value always yields the same hash.
140    /// - **Indexing**: provides a fixed-size `[u8; 16]` fingerprint for use in secondary indexes
141    ///   and fast equality lookups.
142    /// - **Canonicalization**: ensures semantically equal values hash identically, avoiding
143    ///   “same value, different bytes” bugs.
144    ///
145    /// Use this in query planning, index scans, and anywhere you need a compact,
146    /// reproducible identity for a `Value`.
147    ///
148    fn write_to_hasher(&self, h: &mut Xxh3) {
149        feed_u8(h, self.tag());
150
151        match self {
152            Self::Account(a) => {
153                feed_bytes(h, &a.to_bytes());
154            }
155            Self::Blob(v) => {
156                feed_u8(h, 0x01);
157                feed_bytes(h, v);
158            }
159            Self::Bool(b) => {
160                feed_u8(h, u8::from(*b));
161            }
162            Self::Date(d) => feed_i32(h, d.get()),
163            Self::Decimal(d) => {
164                // encode (sign, scale, mantissa) deterministically:
165                feed_u8(h, u8::from(d.is_sign_negative()));
166                feed_u32(h, d.scale());
167                feed_bytes(h, &d.mantissa().to_be_bytes());
168            }
169            Self::Duration(t) => {
170                feed_u64(h, t.get());
171            }
172            Self::Enum(v) => {
173                match &v.path {
174                    Some(path) => {
175                        feed_u8(h, 0x01); // path present
176                        feed_u32(h, path.len() as u32);
177                        feed_bytes(h, path.as_bytes());
178                    }
179                    None => feed_u8(h, 0x00), // path absent → loose match
180                }
181
182                feed_u32(h, v.variant.len() as u32);
183                feed_bytes(h, v.variant.as_bytes());
184
185                match &v.payload {
186                    Some(payload) => {
187                        feed_u8(h, 0x01); // payload present
188                        payload.write_to_hasher(h); // include nested value
189                    }
190                    None => feed_u8(h, 0x00),
191                }
192            }
193            Self::E8s(v) => {
194                feed_u64(h, v.get());
195            }
196            Self::E18s(v) => {
197                feed_bytes(h, &v.to_be_bytes());
198            }
199            Self::Float32(v) => {
200                feed_bytes(h, &v.to_be_bytes());
201            }
202            Self::Float64(v) => {
203                feed_bytes(h, &v.to_be_bytes());
204            }
205            Self::Int(i) => {
206                feed_i64(h, *i);
207            }
208            Self::Int128(i) => {
209                feed_i128(h, i.get());
210            }
211            Self::IntBig(v) => {
212                feed_bytes(h, &v.to_leb128());
213            }
214            Self::List(xs) => {
215                feed_u32(h, xs.len() as u32);
216                for x in xs {
217                    feed_u8(h, 0xFF);
218                    x.write_to_hasher(h); // recurse, no sub-hash
219                }
220            }
221            Self::Principal(p) => {
222                let raw = p.as_slice();
223                feed_u32(h, raw.len() as u32);
224                feed_bytes(h, raw);
225            }
226            Self::Subaccount(s) => {
227                feed_bytes(h, &s.to_bytes());
228            }
229            Self::Text(s) => {
230                // If you need case/Unicode insensitivity, normalize; else skip (much faster)
231                // let norm = normalize_nfkc_casefold(s);
232                // feed_u32( h, norm.len() as u32);
233                // feed_bytes( h, norm.as_bytes());
234                feed_u32(h, s.len() as u32);
235                feed_bytes(h, s.as_bytes());
236            }
237            Self::Timestamp(t) => {
238                feed_u64(h, t.get());
239            }
240            Self::Uint(u) => {
241                feed_u64(h, *u);
242            }
243            Self::Uint128(u) => {
244                feed_u128(h, u.get());
245            }
246            Self::UintBig(v) => {
247                feed_bytes(h, &v.to_leb128());
248            }
249            Self::Ulid(u) => {
250                feed_bytes(h, &u.to_bytes());
251            }
252            Self::None | Self::Unit | Self::Unsupported => {}
253        }
254    }
255
256    #[must_use]
257    /// Stable hash used for index/storage fingerprints.
258    pub fn hash_value(&self) -> [u8; 16] {
259        const VERSION: u8 = 1;
260
261        let mut h = Xxh3::with_seed(0);
262        feed_u8(&mut h, VERSION); // version
263
264        self.write_to_hasher(&mut h);
265        h.digest128().to_be_bytes()
266    }
267}