icydb_core/value/
bytes.rs

1use crate::value::Value;
2use canic::utils::hash::Xxh3;
3
4///
5/// ValueTag
6///
7
8#[repr(u8)]
9#[derive(Clone, Copy, Debug, Eq, PartialEq)]
10pub enum ValueTag {
11    Account = 1,
12    Blob = 2,
13    Bool = 3,
14    Date = 4,
15    Decimal = 5,
16    Duration = 6,
17    Enum = 7,
18    E8s = 8,
19    E18s = 9,
20    Float32 = 10,
21    Float64 = 11,
22    Int = 12,
23    Int128 = 13,
24    IntBig = 14,
25    List = 15,
26    None = 16,
27    Principal = 17,
28    Subaccount = 18,
29    Text = 19,
30    Timestamp = 20,
31    Uint = 21,
32    Uint128 = 22,
33    UintBig = 23,
34    Ulid = 24,
35    Unit = 25,
36    Unsupported = 26,
37}
38
39impl ValueTag {
40    #[must_use]
41    pub const fn to_u8(self) -> u8 {
42        self as u8
43    }
44}
45
46impl Value {
47    ///
48    /// HASHING
49    ///
50
51    #[must_use]
52    pub const fn tag(&self) -> u8 {
53        match self {
54            Self::Account(_) => ValueTag::Account,
55            Self::Blob(_) => ValueTag::Blob,
56            Self::Bool(_) => ValueTag::Bool,
57            Self::Date(_) => ValueTag::Date,
58            Self::Decimal(_) => ValueTag::Decimal,
59            Self::Duration(_) => ValueTag::Duration,
60            Self::Enum(_) => ValueTag::Enum,
61            Self::E8s(_) => ValueTag::E8s,
62            Self::E18s(_) => ValueTag::E18s,
63            Self::Float32(_) => ValueTag::Float32,
64            Self::Float64(_) => ValueTag::Float64,
65            Self::Int(_) => ValueTag::Int,
66            Self::Int128(_) => ValueTag::Int128,
67            Self::IntBig(_) => ValueTag::IntBig,
68            Self::List(_) => ValueTag::List,
69            Self::None => ValueTag::None,
70            Self::Principal(_) => ValueTag::Principal,
71            Self::Subaccount(_) => ValueTag::Subaccount,
72            Self::Text(_) => ValueTag::Text,
73            Self::Timestamp(_) => ValueTag::Timestamp,
74            Self::Uint(_) => ValueTag::Uint,
75            Self::Uint128(_) => ValueTag::Uint128,
76            Self::UintBig(_) => ValueTag::UintBig,
77            Self::Ulid(_) => ValueTag::Ulid,
78            Self::Unit => ValueTag::Unit,
79            Self::Unsupported => ValueTag::Unsupported,
80        }
81        .to_u8()
82    }
83}
84
85///
86/// Canonical Byte Representation
87///
88
89#[inline]
90fn feed_i32(h: &mut Xxh3, x: i32) {
91    h.update(&x.to_be_bytes());
92}
93#[inline]
94fn feed_i64(h: &mut Xxh3, x: i64) {
95    h.update(&x.to_be_bytes());
96}
97#[inline]
98fn feed_i128(h: &mut Xxh3, x: i128) {
99    h.update(&x.to_be_bytes());
100}
101#[inline]
102fn feed_u8(h: &mut Xxh3, x: u8) {
103    h.update(&[x]);
104}
105#[inline]
106fn feed_u32(h: &mut Xxh3, x: u32) {
107    h.update(&x.to_be_bytes());
108}
109#[inline]
110fn feed_u64(h: &mut Xxh3, x: u64) {
111    h.update(&x.to_be_bytes());
112}
113#[inline]
114fn feed_u128(h: &mut Xxh3, x: u128) {
115    h.update(&x.to_be_bytes());
116}
117
118#[inline]
119fn feed_bytes(h: &mut Xxh3, b: &[u8]) {
120    h.update(b);
121}
122
123#[allow(clippy::cast_possible_truncation)]
124impl Value {
125    ///
126    /// Compute a **canonical, deterministic 128-bit fingerprint** of this `Value`.
127    ///
128    /// This is *not* the same as serializing the value (e.g. with CBOR or Serde) and hashing:
129    /// - CBOR is not canonical by default (ints can have multiple encodings, maps can reorder keys, NaN payloads differ, etc.).
130    /// - Rust's internal layout is not stable across versions or platforms.
131    ///
132    /// Instead, we define our own **canonical byte representation**:
133    /// - Prefix with a fixed `VERSION` byte to allow evolution of the format.
134    /// - Prefix with a `ValueTag` to distinguish enum variants (`Int(1)` vs `Uint(1)`).
135    /// - Encode each variant deterministically (e.g. Decimal as sign/scale/mantissa).
136    /// - Recurse through lists element-by-element in order.
137    ///
138    /// ### Why?
139    /// - **Stable across upgrades / canisters**: the same logical value always yields the same hash.
140    /// - **Indexing**: provides a fixed-size `[u8; 16]` fingerprint for use in secondary indexes
141    ///   and fast equality lookups.
142    /// - **Canonicalization**: ensures semantically equal values hash identically, avoiding
143    ///   “same value, different bytes” bugs.
144    ///
145    /// Use this in query planning, index scans, and anywhere you need a compact,
146    /// reproducible identity for a `Value`.
147    ///
148    fn write_to_hasher(&self, h: &mut Xxh3) {
149        feed_u8(h, self.tag());
150
151        match self {
152            Self::Account(a) => {
153                feed_bytes(h, &a.to_bytes());
154            }
155            Self::Blob(v) => {
156                feed_u8(h, 0x01);
157                feed_bytes(h, v);
158            }
159            Self::Bool(b) => {
160                feed_u8(h, u8::from(*b));
161            }
162            Self::Date(d) => feed_i32(h, d.get()),
163            Self::Decimal(d) => {
164                // encode (sign, scale, mantissa) deterministically:
165                feed_u8(h, u8::from(d.is_sign_negative()));
166                feed_u32(h, d.scale());
167                feed_bytes(h, &d.mantissa().to_be_bytes());
168            }
169            Self::Duration(t) => {
170                feed_u64(h, t.get());
171            }
172            Self::Enum(v) => {
173                feed_u32(h, v.path.len() as u32);
174                feed_bytes(h, v.path.as_bytes());
175
176                feed_u32(h, v.variant.len() as u32);
177                feed_bytes(h, v.variant.as_bytes());
178            }
179            Self::E8s(v) => {
180                feed_u64(h, v.get());
181            }
182            Self::E18s(v) => {
183                feed_bytes(h, &v.to_be_bytes());
184            }
185            Self::Float32(v) => {
186                feed_bytes(h, &v.to_be_bytes());
187            }
188            Self::Float64(v) => {
189                feed_bytes(h, &v.to_be_bytes());
190            }
191            Self::Int(i) => {
192                feed_i64(h, *i);
193            }
194            Self::Int128(i) => {
195                feed_i128(h, i.get());
196            }
197            Self::IntBig(v) => {
198                feed_bytes(h, &v.to_leb128());
199            }
200            Self::List(xs) => {
201                feed_u32(h, xs.len() as u32);
202                for x in xs {
203                    feed_u8(h, 0xFF);
204                    x.write_to_hasher(h); // recurse, no sub-hash
205                }
206            }
207            Self::Principal(p) => {
208                let raw = p.as_slice();
209                feed_u32(h, raw.len() as u32);
210                feed_bytes(h, raw);
211            }
212            Self::Subaccount(s) => {
213                feed_bytes(h, &s.to_bytes());
214            }
215            Self::Text(s) => {
216                // If you need case/Unicode insensitivity, normalize; else skip (much faster)
217                // let norm = normalize_nfkc_casefold(s);
218                // feed_u32( h, norm.len() as u32);
219                // feed_bytes( h, norm.as_bytes());
220                feed_u32(h, s.len() as u32);
221                feed_bytes(h, s.as_bytes());
222            }
223            Self::Timestamp(t) => {
224                feed_u64(h, t.get());
225            }
226            Self::Uint(u) => {
227                feed_u64(h, *u);
228            }
229            Self::Uint128(u) => {
230                feed_u128(h, u.get());
231            }
232            Self::UintBig(v) => {
233                feed_bytes(h, &v.to_leb128());
234            }
235            Self::Ulid(u) => {
236                feed_bytes(h, &u.to_bytes());
237            }
238            Self::None | Self::Unit | Self::Unsupported => {}
239        }
240    }
241
242    #[must_use]
243    pub fn hash_value(&self) -> [u8; 16] {
244        const VERSION: u8 = 1;
245
246        let mut h = Xxh3::with_seed(0);
247        feed_u8(&mut h, VERSION); // version
248
249        self.write_to_hasher(&mut h);
250        h.digest128().to_be_bytes()
251    }
252}