icydb_core/value/
bytes.rs

1use crate::value::Value;
2use canic::utils::hash::Xxh3;
3
4///
5/// Canonical Byte Representation
6///
7
8#[inline]
9fn feed_i32(h: &mut Xxh3, x: i32) {
10    h.update(&x.to_be_bytes());
11}
12#[inline]
13fn feed_i64(h: &mut Xxh3, x: i64) {
14    h.update(&x.to_be_bytes());
15}
16#[inline]
17fn feed_i128(h: &mut Xxh3, x: i128) {
18    h.update(&x.to_be_bytes());
19}
20#[inline]
21fn feed_u8(h: &mut Xxh3, x: u8) {
22    h.update(&[x]);
23}
24#[inline]
25fn feed_u32(h: &mut Xxh3, x: u32) {
26    h.update(&x.to_be_bytes());
27}
28#[inline]
29fn feed_u64(h: &mut Xxh3, x: u64) {
30    h.update(&x.to_be_bytes());
31}
32#[inline]
33fn feed_u128(h: &mut Xxh3, x: u128) {
34    h.update(&x.to_be_bytes());
35}
36
37#[inline]
38fn feed_bytes(h: &mut Xxh3, b: &[u8]) {
39    h.update(b);
40}
41
42#[allow(clippy::cast_possible_truncation)]
43impl Value {
44    ///
45    /// Compute a **canonical, deterministic 128-bit fingerprint** of this `Value`.
46    ///
47    /// This is *not* the same as serializing the value (e.g. with CBOR or Serde) and hashing:
48    /// - CBOR is not canonical by default (ints can have multiple encodings, maps can reorder keys, NaN payloads differ, etc.).
49    /// - Rust's internal layout is not stable across versions or platforms.
50    ///
51    /// Instead, we define our own **canonical byte representation**:
52    /// - Prefix with a fixed `VERSION` byte to allow evolution of the format.
53    /// - Prefix with a `ValueTag` to distinguish enum variants (`Int(1)` vs `Uint(1)`).
54    /// - Encode each variant deterministically (e.g. Decimal as sign/scale/mantissa).
55    /// - Recurse through lists element-by-element in order.
56    ///
57    /// ### Why?
58    /// - **Stable across upgrades / canisters**: the same logical value always yields the same hash.
59    /// - **Indexing**: provides a fixed-size `[u8; 16]` fingerprint for use in secondary indexes
60    ///   and fast equality lookups.
61    /// - **Canonicalization**: ensures semantically equal values hash identically, avoiding
62    ///   “same value, different bytes” bugs.
63    ///
64    /// Use this in query planning, index scans, and anywhere you need a compact,
65    /// reproducible identity for a `Value`.
66    ///
67    fn write_to_hasher(&self, h: &mut Xxh3) {
68        feed_u8(h, self.tag());
69
70        match self {
71            Self::Account(a) => {
72                feed_bytes(h, &a.to_bytes());
73            }
74            Self::Blob(v) => {
75                feed_u8(h, 0x01);
76                feed_bytes(h, v);
77            }
78            Self::Bool(b) => {
79                feed_u8(h, u8::from(*b));
80            }
81            Self::Date(d) => feed_i32(h, d.get()),
82            Self::Decimal(d) => {
83                // encode (sign, scale, mantissa) deterministically:
84                feed_u8(h, u8::from(d.is_sign_negative()));
85                feed_u32(h, d.scale());
86                feed_bytes(h, &d.mantissa().to_be_bytes());
87            }
88            Self::Duration(t) => {
89                feed_u64(h, t.get());
90            }
91            Self::Enum(v) => {
92                feed_u32(h, v.path.len() as u32);
93                feed_bytes(h, v.path.as_bytes());
94
95                feed_u32(h, v.variant.len() as u32);
96                feed_bytes(h, v.variant.as_bytes());
97            }
98            Self::E8s(v) => {
99                feed_u64(h, v.get());
100            }
101            Self::E18s(v) => {
102                feed_bytes(h, &v.to_be_bytes());
103            }
104            Self::Float32(v) => {
105                feed_bytes(h, &v.to_be_bytes());
106            }
107            Self::Float64(v) => {
108                feed_bytes(h, &v.to_be_bytes());
109            }
110            Self::Int(i) => {
111                feed_i64(h, *i);
112            }
113            Self::Int128(i) => {
114                feed_i128(h, i.get());
115            }
116            Self::IntBig(v) => {
117                feed_bytes(h, &v.to_leb128());
118            }
119            Self::List(xs) => {
120                feed_u32(h, xs.len() as u32);
121                for x in xs {
122                    feed_u8(h, 0xFF);
123                    x.write_to_hasher(h); // recurse, no sub-hash
124                }
125            }
126            Self::Principal(p) => {
127                let raw = p.as_slice();
128                feed_u32(h, raw.len() as u32);
129                feed_bytes(h, raw);
130            }
131            Self::Subaccount(s) => {
132                feed_bytes(h, &s.to_bytes());
133            }
134            Self::Text(s) => {
135                // If you need case/Unicode insensitivity, normalize; else skip (much faster)
136                // let norm = normalize_nfkc_casefold(s);
137                // feed_u32( h, norm.len() as u32);
138                // feed_bytes( h, norm.as_bytes());
139                feed_u32(h, s.len() as u32);
140                feed_bytes(h, s.as_bytes());
141            }
142            Self::Timestamp(t) => {
143                feed_u64(h, t.get());
144            }
145            Self::Uint(u) => {
146                feed_u64(h, *u);
147            }
148            Self::Uint128(u) => {
149                feed_u128(h, u.get());
150            }
151            Self::UintBig(v) => {
152                feed_bytes(h, &v.to_leb128());
153            }
154            Self::Ulid(u) => {
155                feed_bytes(h, &u.to_bytes());
156            }
157            Self::None | Self::Unit | Self::Unsupported => {}
158        }
159    }
160
161    #[must_use]
162    pub fn hash_value(&self) -> [u8; 16] {
163        const VERSION: u8 = 1;
164
165        let mut h = Xxh3::with_seed(0);
166        feed_u8(&mut h, VERSION); // version
167
168        self.write_to_hasher(&mut h);
169        h.digest128().to_be_bytes()
170    }
171}