1use core::fmt;
2use std::{
3 any::Any,
4 hash::{Hash, Hasher},
5};
6
7use base64::Engine;
8use fxhash::FxHasher32;
9use siphasher::sip128::{Hasher128, SipHasher13};
10
11#[cfg(feature = "rkyv")]
12use rkyv::{Archive, Deserialize as rDeser, Serialize as rSer};
13
14use crate::error::prelude::ZResult;
15
16pub(crate) type FxBuildHasher = std::hash::BuildHasherDefault<FxHasher>;
17pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
18pub type FxDashMap<K, V> = dashmap::DashMap<K, V, FxBuildHasher>;
21
22#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
34#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
35#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
36pub struct Fingerprint {
37 lo: u64,
38 hi: u64,
39}
40
41impl fmt::Debug for Fingerprint {
42 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43 f.write_str(&self.as_svg_id("fg"))
44 }
45}
46
47impl serde::Serialize for Fingerprint {
48 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
49 serializer.serialize_str(&self.as_svg_id(""))
50 }
51}
52
53impl<'de> serde::Deserialize<'de> for Fingerprint {
54 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
55 let s = <std::string::String as serde::Deserialize>::deserialize(deserializer)?;
56 Fingerprint::try_from_str(&s).map_err(serde::de::Error::custom)
57 }
58}
59
60impl Fingerprint {
61 pub fn from_pair(lo: u64, hi: u64) -> Self {
63 Self { lo, hi }
64 }
65
66 pub const fn from_u128(hash: u128) -> Self {
68 Self {
70 lo: hash as u64,
71 hi: (hash >> 64) as u64,
72 }
73 }
74
75 pub fn to_u128(self) -> u128 {
77 ((self.hi as u128) << 64) | self.lo as u128
78 }
79
80 pub fn lower32(self) -> u32 {
84 self.lo as u32
85 }
86
87 pub fn try_from_str(s: &str) -> ZResult<Self> {
89 let bytes = base64::engine::general_purpose::STANDARD_NO_PAD
90 .decode(&s.as_bytes()[..11])
91 .expect("invalid base64 string");
92 let lo = u64::from_le_bytes(bytes.try_into().unwrap());
93 let mut bytes = base64::engine::general_purpose::STANDARD_NO_PAD
94 .decode(&s.as_bytes()[11..])
95 .expect("invalid base64 string");
96 bytes.resize(8, 0);
97 let hi = u64::from_le_bytes(bytes.try_into().unwrap());
98 Ok(Self::from_pair(lo, hi))
99 }
100
101 #[comemo::memoize]
105 pub fn as_svg_id(self, prefix: &'static str) -> String {
106 let fingerprint_lo =
107 base64::engine::general_purpose::STANDARD_NO_PAD.encode(self.lo.to_le_bytes());
108 if self.hi == 0 {
109 return [prefix, &fingerprint_lo].join("");
110 }
111
112 let fingerprint_hi = {
114 let id = self.hi.to_le_bytes();
115 let rev_zero = id.iter().rev().skip_while(|&&b| b == 0).count();
117 let id = &id[..rev_zero];
118 base64::engine::general_purpose::STANDARD_NO_PAD.encode(id)
119 };
120 [prefix, &fingerprint_lo, &fingerprint_hi].join("")
121 }
122}
123
124pub trait FingerprintHasher: std::hash::Hasher {
126 fn finish_fingerprint(self) -> (Fingerprint, Vec<u8>);
129}
130
131#[derive(Default)]
133pub struct FingerprintSipHasher {
134 data: Vec<u8>,
136}
137
138pub type FingerprintSipHasherBase = SipHasher13;
139
140impl FingerprintSipHasher {
141 pub fn fast_hash(&self) -> (u32, &Vec<u8>) {
142 let mut inner = FxHasher32::default();
143 self.data.hash(&mut inner);
144 (inner.finish() as u32, &self.data)
145 }
146}
147
148impl std::hash::Hasher for FingerprintSipHasher {
149 fn write(&mut self, bytes: &[u8]) {
150 self.data.extend_from_slice(bytes);
151 }
152
153 fn finish(&self) -> u64 {
154 let mut inner = FingerprintSipHasherBase::default();
155 self.data.hash(&mut inner);
156 inner.finish()
157 }
158}
159
160impl FingerprintHasher for FingerprintSipHasher {
161 fn finish_fingerprint(self) -> (Fingerprint, Vec<u8>) {
162 let buffer = self.data.clone();
163 let mut inner = FingerprintSipHasherBase::default();
164 buffer.hash(&mut inner);
165 let hash = inner.finish128();
166 (
167 Fingerprint {
168 lo: hash.h1,
169 hi: hash.h2,
170 },
171 buffer,
172 )
173 }
174}
175
176#[derive(Default)]
180pub struct FingerprintBuilder {
181 #[cfg(feature = "bi-hash")]
183 fast_conflict_checker: crate::adt::CHashMap<u32, Vec<u8>>,
184 conflict_checker: crate::adt::CHashMap<Fingerprint, Vec<u8>>,
186}
187
188#[cfg(not(feature = "bi-hash"))]
189impl FingerprintBuilder {
190 pub fn resolve_unchecked<T: Hash>(&self, item: &T) -> Fingerprint {
191 let mut s = FingerprintSipHasher { data: Vec::new() };
192 item.hash(&mut s);
193 let (fingerprint, _featured_data) = s.finish_fingerprint();
194 fingerprint
195 }
196
197 pub fn resolve<T: Hash + 'static>(&self, item: &T) -> Fingerprint {
198 let mut s = FingerprintSipHasher { data: Vec::new() };
199 item.type_id().hash(&mut s);
200 item.hash(&mut s);
201
202 let (fingerprint, featured_data) = s.finish_fingerprint();
203 let Some(prev_featured_data) = self.conflict_checker.get(&fingerprint) else {
204 self.conflict_checker.insert(fingerprint, featured_data);
205 return fingerprint;
206 };
207
208 if *prev_featured_data == *featured_data {
209 return fingerprint;
210 }
211
212 panic!("Fingerprint conflict detected!");
214 }
215}
216
217#[cfg(feature = "bi-hash")]
218impl FingerprintBuilder {
219 pub fn resolve_unchecked<T: Hash>(&self, item: &T) -> Fingerprint {
220 let mut s = FingerprintSipHasher { data: Vec::new() };
221 item.hash(&mut s);
222 let (fingerprint, featured_data) = s.fast_hash();
223 let Some(prev_featured_data) = self.fast_conflict_checker.get(&fingerprint) else {
224 self.fast_conflict_checker.insert(fingerprint, s.data);
225 return Fingerprint::from_pair(fingerprint as u64, 0);
226 };
227
228 if *prev_featured_data == *featured_data {
229 return Fingerprint::from_pair(fingerprint as u64, 0);
230 }
231
232 let (fingerprint, _featured_data) = s.finish_fingerprint();
233 fingerprint
234 }
235
236 pub fn resolve<T: Hash + 'static>(&self, item: &T) -> Fingerprint {
237 let mut s = FingerprintSipHasher { data: Vec::new() };
238 item.type_id().hash(&mut s);
239 item.hash(&mut s);
240 let (fingerprint, featured_data) = s.fast_hash();
241 let Some(prev_featured_data) = self.fast_conflict_checker.get(&fingerprint) else {
242 self.fast_conflict_checker.insert(fingerprint, s.data);
243 return Fingerprint::from_pair(fingerprint as u64, 0);
244 };
245
246 if *prev_featured_data == *featured_data {
247 return Fingerprint::from_pair(fingerprint as u64, 0);
248 }
249
250 let (fingerprint, featured_data) = s.finish_fingerprint();
251 let Some(prev_featured_data) = self.conflict_checker.get(&fingerprint) else {
252 self.conflict_checker.insert(fingerprint, featured_data);
253 return fingerprint;
254 };
255
256 if *prev_featured_data == *featured_data {
257 return fingerprint;
258 }
259
260 panic!("Fingerprint conflict detected!");
262 }
263}
264
265pub fn item_hash128<T: Hash + 'static>(item: &T) -> u128 {
271 let mut state = SipHasher13::new();
274 item.type_id().hash(&mut state);
275 item.hash(&mut state);
276 state.finish128().as_u128()
277}
278
279#[inline]
283pub fn hash128<T: std::hash::Hash>(value: &T) -> u128 {
284 let mut state = SipHasher13::new();
285 value.hash(&mut state);
286 state.finish128().as_u128()
287}
288
289#[inline]
291pub fn hash64<T: Hash + ?Sized>(v: &T) -> u64 {
292 let mut state = FxHasher::default();
293 v.hash(&mut state);
294 state.finish()
295}
296
297pub use fxhash::hash32;
299
300#[test]
301fn test_fingerprint() {
302 let t = Fingerprint::from_pair(0, 1);
303 assert_eq!(Fingerprint::try_from_str(&t.as_svg_id("")).unwrap(), t);
304
305 let t = Fingerprint::from_pair(1, 1);
306 assert_eq!(Fingerprint::try_from_str(&t.as_svg_id("")).unwrap(), t);
307
308 let t = Fingerprint::from_pair(1, 0);
309 assert_eq!(Fingerprint::try_from_str(&t.as_svg_id("")).unwrap(), t);
310
311 let t = Fingerprint::from_pair(0, 0);
312 assert_eq!(Fingerprint::try_from_str(&t.as_svg_id("")).unwrap(), t);
313}