1pub(super) mod sketch;
2#[cfg(test)]
3mod tests;
4
5use std::hash::BuildHasher;
6
7pub use sketch::{Sketch, M1024, M128, M2048, M256, M4096, M512, M64, M8192};
8
9use crate::AHasherDefaultBuilder;
10
11#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
16#[derive(Debug, Eq, PartialEq, Hash, Clone)]
17pub struct HyperTwoBits<SKETCH: Sketch = M256, HASH: BuildHasher = AHasherDefaultBuilder> {
18 hash: HASH,
19 sketch: SKETCH,
20 count: u32,
21 t: u32,
22}
23
24impl<SKETCH: Sketch, H: Default + BuildHasher> Default for HyperTwoBits<SKETCH, H> {
25 fn default() -> Self {
26 Self {
27 hash: H::default(),
28 sketch: SKETCH::default(),
29 count: 0,
30 t: 1,
31 }
32 }
33}
34
35impl<HASH: BuildHasher + Default, BITS: Sketch> HyperTwoBits<BITS, HASH> {
36 const ALPHA: f64 = 0.988;
37 #[must_use]
38 pub fn new() -> Self {
41 Self {
42 hash: HASH::default(),
43 sketch: BITS::default(),
44 count: 0,
45 t: 1,
46 }
47 }
48
49 pub fn merge(&mut self, mut other: Self) {
53 assert_eq!(
54 self.hash.hash_one(42),
55 other.hash.hash_one(42),
56 "Hashers must be the same, can not merge"
57 );
58 #[allow(
61 clippy::cast_lossless,
62 clippy::cast_sign_loss,
63 clippy::cast_possible_truncation
64 )]
65 let threshold = const { (0.99 * (BITS::STREAMS as f64)) as u32 };
66 if other.t > self.t {
68 std::mem::swap(self, &mut other);
69 }
70
71 if self.t - other.t > 8 {
73 return;
74 }
75 let same = self.t == other.t;
78 if self.count >= threshold {
80 self.count = self.sketch.decrement();
81 self.t += 4;
82 }
83
84 if same {
85 self.sketch.merge(&other.sketch);
87 } else {
88 self.sketch.merge_high_into_lo(&other.sketch);
90 }
91 self.count = self.sketch.count();
93 }
94
95 #[inline]
96 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
97 pub fn insert<V: std::hash::Hash + ?Sized>(&mut self, value: &V) {
99 let hash = self.hash.hash_one(value);
100 self.insert_hash(hash);
101 }
102
103 #[inline]
104 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
105 pub fn insert_hash(&mut self, hash: u64) {
107 let threshold: u32 = const { (Self::ALPHA * BITS::STREAMS as f64) as u32 };
108 let stream: u32 = (hash >> BITS::IDX_SHIFT) as u32;
110 let hash: u64 = hash & BITS::HASH_MASK;
111
112 if hash.trailing_ones() >= self.t && self.sketch.val(stream) < 1 {
113 self.count += 1;
114 self.sketch.set(stream, 1);
115 }
116 if hash.trailing_ones() >= self.t + 4 && self.sketch.val(stream) < 2 {
118 self.sketch.set(stream, 2);
119 }
120
121 if hash.trailing_ones() >= self.t + 8 && self.sketch.val(stream) < 3 {
123 self.sketch.set(stream, 3);
124 }
125
126 if self.count >= threshold {
127 self.count = self.sketch.decrement();
128 self.t += 4;
129 }
130 }
131
132 #[inline]
133 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
134 pub fn insert2<V: std::hash::Hash>(&mut self, v1: &V, v2: &V) {
137 let threshold: u32 = const { (Self::ALPHA * BITS::STREAMS as f64) as u32 };
138
139 let hash = self.hash.hash_one(v1);
140 let stream: u32 = (hash >> BITS::IDX_SHIFT) as u32;
142 let hash: u64 = hash & BITS::HASH_MASK;
143
144 if hash.trailing_ones() >= self.t && self.sketch.val(stream) < 1 {
145 self.count += 1;
146 self.sketch.set(stream, 1);
147 }
148 if hash.trailing_ones() >= self.t + 4 && self.sketch.val(stream) < 2 {
150 self.sketch.set(stream, 2);
151 }
152
153 if hash.trailing_ones() >= self.t + 8 && self.sketch.val(stream) < 3 {
155 self.sketch.set(stream, 3);
156 }
157
158 let hash = self.hash.hash_one(v2);
159 let stream: u32 = (hash >> BITS::IDX_SHIFT) as u32;
161 let hash: u64 = hash & BITS::HASH_MASK;
162
163 if hash.trailing_ones() >= self.t && self.sketch.val(stream) < 1 {
164 self.count += 1;
165 self.sketch.set(stream, 1);
166 }
167 if hash.trailing_ones() >= self.t + 4 && self.sketch.val(stream) < 2 {
169 self.sketch.set(stream, 2);
170 }
171
172 if hash.trailing_ones() >= self.t + 8 && self.sketch.val(stream) < 3 {
174 self.sketch.set(stream, 3);
175 }
176
177 if self.count >= threshold {
178 self.count = self.sketch.decrement();
179 self.t += 4;
180 }
181 }
182
183 #[inline]
184 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
185 pub fn insert4<V: std::hash::Hash>(&mut self, v1: &V, v2: &V, v3: &V, v4: &V) {
188 let threshold: u32 = const { (Self::ALPHA * BITS::STREAMS as f64) as u32 };
189
190 let hash = self.hash.hash_one(v1);
191 let stream: u32 = (hash >> BITS::IDX_SHIFT) as u32;
193 let hash: u64 = hash & BITS::HASH_MASK;
194
195 if hash.trailing_ones() >= self.t && self.sketch.val(stream) < 1 {
196 self.count += 1;
197 self.sketch.set(stream, 1);
198 }
199 if hash.trailing_ones() >= self.t + 4 && self.sketch.val(stream) < 2 {
201 self.sketch.set(stream, 2);
202 }
203
204 if hash.trailing_ones() >= self.t + 8 && self.sketch.val(stream) < 3 {
206 self.sketch.set(stream, 3);
207 }
208
209 let hash = self.hash.hash_one(v2);
210 let stream: u32 = (hash >> BITS::IDX_SHIFT) as u32;
212 let hash: u64 = hash & BITS::HASH_MASK;
213
214 if hash.trailing_ones() >= self.t && self.sketch.val(stream) < 1 {
215 self.count += 1;
216 self.sketch.set(stream, 1);
217 }
218 if hash.trailing_ones() >= self.t + 4 && self.sketch.val(stream) < 2 {
220 self.sketch.set(stream, 2);
221 }
222
223 if hash.trailing_ones() >= self.t + 8 && self.sketch.val(stream) < 3 {
225 self.sketch.set(stream, 3);
226 }
227
228 let hash = self.hash.hash_one(v3);
229 let stream: u32 = (hash >> BITS::IDX_SHIFT) as u32;
231 let hash: u64 = hash & BITS::HASH_MASK;
232
233 if hash.trailing_ones() >= self.t && self.sketch.val(stream) < 1 {
234 self.count += 1;
235 self.sketch.set(stream, 1);
236 }
237 if hash.trailing_ones() >= self.t + 4 && self.sketch.val(stream) < 2 {
239 self.sketch.set(stream, 2);
240 }
241
242 if hash.trailing_ones() >= self.t + 8 && self.sketch.val(stream) < 3 {
244 self.sketch.set(stream, 3);
245 }
246
247 let hash = self.hash.hash_one(v4);
248 let stream: u32 = (hash >> BITS::IDX_SHIFT) as u32;
250 let hash: u64 = hash & BITS::HASH_MASK;
251
252 if hash.trailing_ones() >= self.t && self.sketch.val(stream) < 1 {
253 self.count += 1;
254 self.sketch.set(stream, 1);
255 }
256 if hash.trailing_ones() >= self.t + 4 && self.sketch.val(stream) < 2 {
258 self.sketch.set(stream, 2);
259 }
260
261 if hash.trailing_ones() >= self.t + 8 && self.sketch.val(stream) < 3 {
263 self.sketch.set(stream, 3);
264 }
265
266 if self.count >= threshold {
267 self.count = self.sketch.decrement();
268 self.t += 4;
269 }
270 }
271
272 #[inline]
275 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
276 pub fn count(&self) -> u64 {
277 let beta = 1.0 - f64::from(self.count) / f64::from(BITS::STREAMS);
278 let bias: f64 = (1.0 / beta).ln();
279 (f64::from(self.t).exp2() * f64::from(BITS::STREAMS) * bias) as u64
280 }
281}