icydb_core/value/
bytes.rs

1use crate::value::Value;
2use canic::utils::hash::Xxh3;
3
4///
5/// ValueTag
6///
7/// Can we remove ValueTag?
8/// Yes, technically.
9///
10/// Should we?
11/// Almost certainly no, unless you control all serialization + don’t need hashing + don’t care about stability.
12///
13/// Why keep it?
14/// Binary stability, hashing, sorting, versioning, IC-safe ABI, robustness.
15///
16
17#[repr(u8)]
18#[derive(Clone, Copy, Debug, Eq, PartialEq)]
19pub enum ValueTag {
20    Account = 1,
21    Blob = 2,
22    Bool = 3,
23    Date = 4,
24    Decimal = 5,
25    Duration = 6,
26    Enum = 7,
27    E8s = 8,
28    E18s = 9,
29    Float32 = 10,
30    Float64 = 11,
31    Int = 12,
32    Int128 = 13,
33    IntBig = 14,
34    List = 15,
35    None = 16,
36    Principal = 17,
37    Subaccount = 18,
38    Text = 19,
39    Timestamp = 20,
40    Uint = 21,
41    Uint128 = 22,
42    UintBig = 23,
43    Ulid = 24,
44    Unit = 25,
45    Unsupported = 26,
46}
47
48impl ValueTag {
49    #[must_use]
50    pub const fn to_u8(self) -> u8 {
51        self as u8
52    }
53}
54
55impl Value {
56    ///
57    /// HASHING
58    ///
59
60    #[must_use]
61    pub const fn tag(&self) -> u8 {
62        match self {
63            Self::Account(_) => ValueTag::Account,
64            Self::Blob(_) => ValueTag::Blob,
65            Self::Bool(_) => ValueTag::Bool,
66            Self::Date(_) => ValueTag::Date,
67            Self::Decimal(_) => ValueTag::Decimal,
68            Self::Duration(_) => ValueTag::Duration,
69            Self::Enum(_) => ValueTag::Enum,
70            Self::E8s(_) => ValueTag::E8s,
71            Self::E18s(_) => ValueTag::E18s,
72            Self::Float32(_) => ValueTag::Float32,
73            Self::Float64(_) => ValueTag::Float64,
74            Self::Int(_) => ValueTag::Int,
75            Self::Int128(_) => ValueTag::Int128,
76            Self::IntBig(_) => ValueTag::IntBig,
77            Self::List(_) => ValueTag::List,
78            Self::None => ValueTag::None,
79            Self::Principal(_) => ValueTag::Principal,
80            Self::Subaccount(_) => ValueTag::Subaccount,
81            Self::Text(_) => ValueTag::Text,
82            Self::Timestamp(_) => ValueTag::Timestamp,
83            Self::Uint(_) => ValueTag::Uint,
84            Self::Uint128(_) => ValueTag::Uint128,
85            Self::UintBig(_) => ValueTag::UintBig,
86            Self::Ulid(_) => ValueTag::Ulid,
87            Self::Unit => ValueTag::Unit,
88            Self::Unsupported => ValueTag::Unsupported,
89        }
90        .to_u8()
91    }
92}
93
94///
95/// Canonical Byte Representation
96///
97
98#[inline]
99fn feed_i32(h: &mut Xxh3, x: i32) {
100    h.update(&x.to_be_bytes());
101}
102#[inline]
103fn feed_i64(h: &mut Xxh3, x: i64) {
104    h.update(&x.to_be_bytes());
105}
106#[inline]
107fn feed_i128(h: &mut Xxh3, x: i128) {
108    h.update(&x.to_be_bytes());
109}
110#[inline]
111fn feed_u8(h: &mut Xxh3, x: u8) {
112    h.update(&[x]);
113}
114#[inline]
115fn feed_u32(h: &mut Xxh3, x: u32) {
116    h.update(&x.to_be_bytes());
117}
118#[inline]
119fn feed_u64(h: &mut Xxh3, x: u64) {
120    h.update(&x.to_be_bytes());
121}
122#[inline]
123fn feed_u128(h: &mut Xxh3, x: u128) {
124    h.update(&x.to_be_bytes());
125}
126
127#[inline]
128fn feed_bytes(h: &mut Xxh3, b: &[u8]) {
129    h.update(b);
130}
131
132#[allow(clippy::cast_possible_truncation)]
133impl Value {
134    ///
135    /// Compute a **canonical, deterministic 128-bit fingerprint** of this `Value`.
136    ///
137    /// This is *not* the same as serializing the value (e.g. with CBOR or Serde) and hashing:
138    /// - CBOR is not canonical by default (ints can have multiple encodings, maps can reorder keys, NaN payloads differ, etc.).
139    /// - Rust's internal layout is not stable across versions or platforms.
140    ///
141    /// Instead, we define our own **canonical byte representation**:
142    /// - Prefix with a fixed `VERSION` byte to allow evolution of the format.
143    /// - Prefix with a `ValueTag` to distinguish enum variants (`Int(1)` vs `Uint(1)`).
144    /// - Encode each variant deterministically (e.g. Decimal as sign/scale/mantissa).
145    /// - Recurse through lists element-by-element in order.
146    ///
147    /// ### Why?
148    /// - **Stable across upgrades / canisters**: the same logical value always yields the same hash.
149    /// - **Indexing**: provides a fixed-size `[u8; 16]` fingerprint for use in secondary indexes
150    ///   and fast equality lookups.
151    /// - **Canonicalization**: ensures semantically equal values hash identically, avoiding
152    ///   “same value, different bytes” bugs.
153    ///
154    /// Use this in query planning, index scans, and anywhere you need a compact,
155    /// reproducible identity for a `Value`.
156    ///
157    fn write_to_hasher(&self, h: &mut Xxh3) {
158        feed_u8(h, self.tag());
159
160        match self {
161            Self::Account(a) => {
162                feed_bytes(h, &a.to_bytes());
163            }
164            Self::Blob(v) => {
165                feed_u8(h, 0x01);
166                feed_bytes(h, v);
167            }
168            Self::Bool(b) => {
169                feed_u8(h, u8::from(*b));
170            }
171            Self::Date(d) => feed_i32(h, d.get()),
172            Self::Decimal(d) => {
173                // encode (sign, scale, mantissa) deterministically:
174                feed_u8(h, u8::from(d.is_sign_negative()));
175                feed_u32(h, d.scale());
176                feed_bytes(h, &d.mantissa().to_be_bytes());
177            }
178            Self::Duration(t) => {
179                feed_u64(h, t.get());
180            }
181            Self::Enum(v) => {
182                match &v.path {
183                    Some(path) => {
184                        feed_u8(h, 0x01); // path present
185                        feed_u32(h, path.len() as u32);
186                        feed_bytes(h, path.as_bytes());
187                    }
188                    None => feed_u8(h, 0x00), // path absent → loose match
189                }
190
191                feed_u32(h, v.variant.len() as u32);
192                feed_bytes(h, v.variant.as_bytes());
193            }
194            Self::E8s(v) => {
195                feed_u64(h, v.get());
196            }
197            Self::E18s(v) => {
198                feed_bytes(h, &v.to_be_bytes());
199            }
200            Self::Float32(v) => {
201                feed_bytes(h, &v.to_be_bytes());
202            }
203            Self::Float64(v) => {
204                feed_bytes(h, &v.to_be_bytes());
205            }
206            Self::Int(i) => {
207                feed_i64(h, *i);
208            }
209            Self::Int128(i) => {
210                feed_i128(h, i.get());
211            }
212            Self::IntBig(v) => {
213                feed_bytes(h, &v.to_leb128());
214            }
215            Self::List(xs) => {
216                feed_u32(h, xs.len() as u32);
217                for x in xs {
218                    feed_u8(h, 0xFF);
219                    x.write_to_hasher(h); // recurse, no sub-hash
220                }
221            }
222            Self::Principal(p) => {
223                let raw = p.as_slice();
224                feed_u32(h, raw.len() as u32);
225                feed_bytes(h, raw);
226            }
227            Self::Subaccount(s) => {
228                feed_bytes(h, &s.to_bytes());
229            }
230            Self::Text(s) => {
231                // If you need case/Unicode insensitivity, normalize; else skip (much faster)
232                // let norm = normalize_nfkc_casefold(s);
233                // feed_u32( h, norm.len() as u32);
234                // feed_bytes( h, norm.as_bytes());
235                feed_u32(h, s.len() as u32);
236                feed_bytes(h, s.as_bytes());
237            }
238            Self::Timestamp(t) => {
239                feed_u64(h, t.get());
240            }
241            Self::Uint(u) => {
242                feed_u64(h, *u);
243            }
244            Self::Uint128(u) => {
245                feed_u128(h, u.get());
246            }
247            Self::UintBig(v) => {
248                feed_bytes(h, &v.to_leb128());
249            }
250            Self::Ulid(u) => {
251                feed_bytes(h, &u.to_bytes());
252            }
253            Self::None | Self::Unit | Self::Unsupported => {}
254        }
255    }
256
257    #[must_use]
258    pub fn hash_value(&self) -> [u8; 16] {
259        const VERSION: u8 = 1;
260
261        let mut h = Xxh3::with_seed(0);
262        feed_u8(&mut h, VERSION); // version
263
264        self.write_to_hasher(&mut h);
265        h.digest128().to_be_bytes()
266    }
267}