1use core::borrow::Borrow;
2use core::hash::{BuildHasher, Hash, Hasher};
3use core::marker::PhantomData;
4
5use serde::{Deserialize, Serialize};
6
7use crate::common::*;
8use crate::HyperLogLog;
9use crate::HyperLogLogError;
10
11#[derive(Clone, Debug, Serialize, Deserialize)]
45pub struct HyperLogLogPF<H, B>
46where
47 H: Hash + ?Sized,
48 B: BuildHasher,
49{
50 builder: B,
51 count: usize,
52 precision: u8,
53 registers: Registers,
54 phantom: PhantomData<H>,
55}
56
57impl<H, B> HyperLogLogPF<H, B>
58where
59 H: Hash + ?Sized,
60 B: BuildHasher,
61{
62 const MIN_PRECISION: u8 = 4;
64 const MAX_PRECISION: u8 = 16;
66
67 pub fn new(precision: u8, builder: B) -> Result<Self, HyperLogLogError> {
69 if precision < Self::MIN_PRECISION || precision > Self::MAX_PRECISION {
71 return Err(HyperLogLogError::InvalidPrecision);
72 }
73
74 let count = Self::register_count(precision);
76
77 Ok(HyperLogLogPF {
78 builder: builder,
79 count: count,
80 precision: precision,
81 registers: Registers::with_count(count),
82 phantom: PhantomData,
83 })
84 }
85
86 pub fn merge<S, T>(
90 &mut self,
91 other: &HyperLogLogPF<S, T>,
92 ) -> Result<(), HyperLogLogError>
93 where
94 S: Hash + ?Sized,
95 T: BuildHasher,
96 {
97 if self.precision != other.precision() {
98 return Err(HyperLogLogError::IncompatiblePrecision);
99 }
100
101 for (i, val) in other.registers_iter().enumerate() {
102 self.registers.set_greater(i, val);
103 }
104
105 Ok(())
106 }
107
108 pub fn insert_any<R>(&mut self, value: &R)
110 where
111 R: Hash + ?Sized,
112 {
113 self.insert_impl(value);
114 }
115
116 #[inline(always)] fn insert_impl<R>(&mut self, value: &R)
118 where
119 R: Hash + ?Sized,
120 {
121 let mut hasher = self.builder.build_hasher();
123 value.hash(&mut hasher);
125 let mut hash: u32 = hasher.finish() as u32;
127
128 let index: usize = (hash >> (32 - self.precision)) as usize;
130
131 hash = (hash << self.precision) | (1 << (self.precision - 1));
133
134 let zeros: u32 = 1 + hash.leading_zeros();
136
137 self.registers.set_greater(index, zeros);
139 }
140
141 #[inline] fn precision(&self) -> u8 {
143 self.precision
144 }
145
146 #[inline] fn registers_iter(&self) -> impl Iterator<Item = u32> + '_ {
148 self.registers.iter()
149 }
150}
151
152impl<H, B> HyperLogLogCommon for HyperLogLogPF<H, B>
153where
154 H: Hash + ?Sized,
155 B: BuildHasher,
156{
157}
158
159impl<H, B> HyperLogLog<H> for HyperLogLogPF<H, B>
160where
161 H: Hash + ?Sized,
162 B: BuildHasher,
163{
164 fn add(&mut self, value: &H) {
166 self.insert_impl(value);
167 }
168
169 fn insert<Q>(&mut self, value: &Q)
171 where
172 H: Borrow<Q>,
173 Q: Hash + ?Sized,
174 {
175 self.insert_impl(value);
176 }
177
178 fn count(&mut self) -> f64 {
180 let (mut raw, zeros) = (
182 Self::estimate_raw_plus(self.registers.iter(), self.count),
183 self.registers.zeros(),
184 );
185
186 let two32 = (1u64 << 32) as f64;
187
188 if raw <= 2.5 * self.count as f64 && zeros != 0 {
189 raw = Self::linear_count(self.count, zeros);
191 } else if raw > two32 / 30.0 {
192 raw = -1.0 * two32 * ln(1.0 - raw / two32);
194 }
195
196 raw
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 use core::hash::{BuildHasher, Hasher};
205 use siphasher::sip::SipHasher13;
206
207 struct PassThroughHasher(u64);
208
209 impl Hasher for PassThroughHasher {
210 #[inline]
211 fn finish(&self) -> u64 {
212 self.0
213 }
214
215 #[inline]
216 fn write(&mut self, _: &[u8]) {}
217
218 #[inline]
219 fn write_u64(&mut self, i: u64) {
220 self.0 = i;
221 }
222 }
223
224 #[derive(Serialize, Deserialize)]
225 struct PassThroughHasherBuilder;
226
227 impl BuildHasher for PassThroughHasherBuilder {
228 type Hasher = PassThroughHasher;
229
230 fn build_hasher(&self) -> Self::Hasher {
231 PassThroughHasher(0)
232 }
233 }
234
235 #[derive(Serialize, Deserialize)]
236 struct DefaultBuildHasher;
237
238 impl BuildHasher for DefaultBuildHasher {
239 type Hasher = SipHasher13;
240
241 fn build_hasher(&self) -> Self::Hasher {
242 SipHasher13::new()
243 }
244 }
245
246 #[test]
247 fn test_insert() {
248 let builder = PassThroughHasherBuilder {};
249
250 let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
251 HyperLogLogPF::new(16, builder).unwrap();
252
253 hll.insert(&0x0000000000010fff);
254
255 assert_eq!(hll.registers.get(1), 5);
256
257 hll.insert(&0x000000000002ffff);
258
259 assert_eq!(hll.registers.get(2), 1);
260
261 hll.insert(&0x0000000000030000);
262
263 assert_eq!(hll.registers.get(3), 17);
264
265 hll.insert(&0x0000000000030001);
266
267 assert_eq!(hll.registers.get(3), 17);
268
269 hll.insert(&0x00000000ff037000);
270
271 assert_eq!(hll.registers.get(0xff03), 2);
272
273 hll.insert(&0x00000000ff030800);
274
275 assert_eq!(hll.registers.get(0xff03), 5);
276
277 let builder = PassThroughHasherBuilder {};
278
279 let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
280 HyperLogLogPF::new(4, builder).unwrap();
281
282 hll.insert(&0x000000001fffffff);
283 assert_eq!(hll.registers.get(1), 1);
284
285 hll.insert(&0x00000000ffffffff);
286 assert_eq!(hll.registers.get(0xf), 1);
287
288 hll.insert(&0x0000000000ffffff);
289 assert_eq!(hll.registers.get(0), 5);
290 }
291
292 #[test]
293 fn test_count() {
294 let builder = PassThroughHasherBuilder {};
295
296 let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
297 HyperLogLogPF::new(16, builder).unwrap();
298
299 assert_eq!(hll.count(), 0.0);
300
301 hll.insert(&0x0000000000010fff);
302 hll.insert(&0x0000000000020fff);
303 hll.insert(&0x0000000000030fff);
304 hll.insert(&0x0000000000040fff);
305 hll.insert(&0x0000000000050fff);
306 hll.insert(&0x0000000000050fff);
307
308 assert_eq!(hll.count().trunc() as u64, 5);
309 }
310
311 #[test]
312 fn test_merge() {
313 let builder = PassThroughHasherBuilder {};
314
315 let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
316 HyperLogLogPF::new(16, builder).unwrap();
317
318 hll.insert(&0x00000000000101ff);
319 hll.insert(&0x00000000000202ff);
320 hll.insert(&0x00000000000304ff);
321 hll.insert(&0x0000000000040fff);
322 hll.insert(&0x0000000000050fff);
323 hll.insert(&0x0000000000060fff);
324
325 assert_eq!(hll.registers.get(1), 8);
326 assert_eq!(hll.registers.get(2), 7);
327 assert_eq!(hll.registers.get(3), 6);
328 assert_eq!(hll.registers.get(4), 5);
329 assert_eq!(hll.registers.get(5), 5);
330 assert_eq!(hll.registers.get(6), 5);
331
332 let builder = PassThroughHasherBuilder {};
333
334 let err: HyperLogLogPF<u64, PassThroughHasherBuilder> =
335 HyperLogLogPF::new(9, builder).unwrap();
336
337 assert_eq!(
338 hll.merge(&err),
339 Err(HyperLogLogError::IncompatiblePrecision)
340 );
341
342 let builder = PassThroughHasherBuilder {};
343
344 let mut other: HyperLogLogPF<u64, PassThroughHasherBuilder> =
345 HyperLogLogPF::new(16, builder).unwrap();
346
347 hll.insert(&0x00000000000404ff);
348 hll.insert(&0x00000000000502ff);
349 hll.insert(&0x00000000000601ff);
350
351 assert_eq!(other.merge(&hll), Ok(()));
352
353 assert_eq!(other.count().trunc() as u64, 6);
354
355 assert_eq!(hll.registers.get(1), 8);
356 assert_eq!(hll.registers.get(2), 7);
357 assert_eq!(hll.registers.get(3), 6);
358 assert_eq!(hll.registers.get(4), 6);
359 assert_eq!(hll.registers.get(5), 7);
360 assert_eq!(hll.registers.get(6), 8);
361 }
362
363 #[test]
364 fn test_serialization() {
365 let builder = PassThroughHasherBuilder {};
366
367 let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
368 HyperLogLogPF::new(16, builder).unwrap();
369
370 hll.insert(&0x0000000000010fff);
371 hll.insert(&0x0000000000020fff);
372 hll.insert(&0x0000000000030fff);
373 hll.insert(&0x0000000000040fff);
374 hll.insert(&0x0000000000050fff);
375 hll.insert(&0x0000000000050fff);
376
377 assert_eq!(hll.count().trunc() as usize, 5);
378
379 let serialized = serde_json::to_string(&hll).unwrap();
380
381 let mut deserialized: HyperLogLogPF<u64, PassThroughHasherBuilder> =
382 serde_json::from_str(&serialized).unwrap();
383
384 assert_eq!(deserialized.count().trunc() as usize, 5);
385
386 deserialized.insert(&0x0000000000060fff);
387
388 assert_eq!(deserialized.count().trunc() as usize, 6);
389
390 let mut hll: HyperLogLogPF<u64, DefaultBuildHasher> =
391 HyperLogLogPF::new(16, DefaultBuildHasher {}).unwrap();
392
393 hll.insert(&0x0000000000010fff);
394 hll.insert(&0x0000000000020fff);
395 hll.insert(&0x0000000000030fff);
396 hll.insert(&0x0000000000040fff);
397 hll.insert(&0x0000000000050fff);
398 hll.insert(&0x0000000000050fff);
399
400 assert_eq!(hll.count().trunc() as usize, 5);
401
402 let serialized = serde_json::to_string(&hll).unwrap();
403
404 let mut deserialized: HyperLogLogPF<u64, PassThroughHasherBuilder> =
405 serde_json::from_str(&serialized).unwrap();
406
407 assert_eq!(deserialized.count().trunc() as usize, 5);
408
409 deserialized.insert(&0x0000000000060fff);
410
411 assert_eq!(deserialized.count().trunc() as usize, 6);
412 }
413
414 #[cfg(feature = "bench-units")]
415 mod benches {
416 extern crate test;
417
418 use super::*;
419 use rand::prelude::*;
420 use test::{black_box, Bencher};
421
422 #[bench]
423 fn bench_insert(b: &mut Bencher) {
424 let builder = PassThroughHasherBuilder {};
425
426 let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
427 HyperLogLogPF::new(16, builder).unwrap();
428
429 b.iter(|| {
430 for i in 0u64..1000 {
431 hll.insert(&(u64::max_value() - i));
432 }
433 })
434 }
435
436 #[bench]
437 fn bench_insert_with_hash(b: &mut Bencher) {
438 let mut rng = rand::thread_rng();
439
440 let workload: Vec<String> = (0..2000)
441 .map(|_| {
442 format!("- {} - {} -", rng.gen::<u64>(), rng.gen::<u64>())
443 })
444 .collect();
445
446 b.iter(|| {
447 let mut hll: HyperLogLogPF<&String, DefaultBuildHasher> =
448 HyperLogLogPF::new(16, DefaultBuildHasher {}).unwrap();
449
450 for val in &workload {
451 hll.insert(&val);
452 }
453
454 let val = hll.count();
455
456 black_box(val);
457 })
458 }
459
460 #[bench]
461 fn bench_count(b: &mut Bencher) {
462 let builder = PassThroughHasherBuilder {};
463
464 let mut hll: HyperLogLogPF<u64, PassThroughHasherBuilder> =
465 HyperLogLogPF::new(16, builder).unwrap();
466
467 for i in 0u64..10000 {
468 hll.insert(&(u64::max_value() - i));
469 }
470
471 b.iter(|| {
472 let count = hll.count();
473 black_box(count);
474 })
475 }
476 }
477}