1#[allow(unused_imports)]
8use std::collections::hash_map::{DefaultHasher, RandomState};
9use std::{
10 collections::BTreeMap,
11 convert::TryInto,
12 ffi, fs,
13 hash::{BuildHasher, Hash, Hasher},
14 io::{self, ErrorKind, Read, Write},
15};
16
17use crate::{BuildHasherDefault, Result};
18
19fn murmur64(mut h: u64) -> u64 {
20 h ^= h >> 33;
21 h = h.wrapping_mul(0xff51_afd7_ed55_8ccd);
22 h ^= h >> 33;
23 h = h.wrapping_mul(0xc4ce_b9fe_1a85_ec53);
24 h ^= h >> 33;
25 h
26}
27
28fn splitmix64(seed: &mut u64) -> u64 {
30 *seed = (*seed).wrapping_add(0x9E37_79B9_7F4A_7C15);
31 let mut z = *seed;
32 z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
33 z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
34 z ^ (z >> 31)
35}
36
37fn mixsplit(key: u64, seed: u64) -> u64 {
38 murmur64(key.wrapping_add(seed))
39}
40
41fn reduce(hash: u32, n: u32) -> u32 {
42 (((hash as u64) * (n as u64)) >> 32) as u32
44}
45
46fn fingerprint(hash: u64) -> u64 {
47 hash ^ (hash >> 32)
48}
49
50#[derive(Clone, Default)]
51struct XorSet {
52 xor_mask: u64,
53 count: u32,
54}
55
56#[derive(Default)]
57struct Hashes {
58 h: u64,
59 h0: u32,
60 h1: u32,
61 h2: u32,
62}
63
64#[derive(Clone, Copy, Default)]
65struct KeyIndex {
66 hash: u64,
67 index: u32,
68}
69
70pub struct Xor8<H = BuildHasherDefault>
91where
92 H: BuildHasher,
93{
94 keys: Option<BTreeMap<u64, ()>>,
95 pub hash_builder: H,
96 pub seed: u64,
97 pub block_length: u32,
98 pub finger_prints: Vec<u8>,
99}
100
101impl<H> PartialEq for Xor8<H>
102where
103 H: BuildHasher,
104{
105 fn eq(&self, other: &Self) -> bool {
106 self.seed == other.seed
107 && self.block_length == other.block_length
108 && self.finger_prints == other.finger_prints
109 }
110}
111
112impl<H> Default for Xor8<H>
113where
114 H: BuildHasher + Default,
115{
116 fn default() -> Self {
117 Xor8 {
118 keys: Some(BTreeMap::new()),
119 hash_builder: H::default(),
120 seed: u64::default(),
121 block_length: u32::default(),
122 finger_prints: Vec::default(),
123 }
124 }
125}
126
127impl<H> Xor8<H>
128where
129 H: BuildHasher,
130{
131 pub fn new() -> Self
133 where
134 H: Default,
135 {
136 Self::default()
137 }
138
139 pub fn with_hasher(hash_builder: H) -> Self {
141 Xor8 {
142 keys: Some(BTreeMap::new()),
143 hash_builder,
144 seed: u64::default(),
145 block_length: u32::default(),
146 finger_prints: Vec::default(),
147 }
148 }
149}
150
151impl<H> Xor8<H>
152where
153 H: BuildHasher,
154{
155 pub fn insert<K: ?Sized + Hash>(&mut self, key: &K) {
158 let hashed_key = {
159 let mut hasher = self.hash_builder.build_hasher();
160 key.hash(&mut hasher);
161 hasher.finish()
162 };
163 self.keys.as_mut().unwrap().insert(hashed_key, ());
164 }
165
166 pub fn populate<K: Hash>(&mut self, keys: &[K]) {
170 keys.iter().for_each(|key| {
171 let mut hasher = self.hash_builder.build_hasher();
172 key.hash(&mut hasher);
173 self.keys.as_mut().unwrap().insert(hasher.finish(), ());
174 })
175 }
176
177 pub fn populate_keys(&mut self, digests: &[u64]) {
179 for digest in digests.iter() {
180 self.keys.as_mut().unwrap().insert(*digest, ());
181 }
182 }
183
184 pub fn build(&mut self) -> Result<()> {
187 match self.keys.take() {
188 Some(keys) => {
189 let digests = keys.iter().map(|(k, _)| *k).collect::<Vec<u64>>();
190 self.build_keys(&digests)
191 }
192 None => Ok(()),
193 }
194 }
195
196 pub fn build_keys(&mut self, digests: &[u64]) -> Result<()> {
203 let (size, mut rngcounter) = (digests.len(), 1_u64);
204 let capacity = {
205 let capacity = 32 + ((1.23 * (size as f64)).ceil() as u32);
206 capacity / 3 * 3 };
208 self.seed = splitmix64(&mut rngcounter);
209 self.block_length = capacity / 3;
210 self.finger_prints = vec![u8::default(); capacity as usize];
211
212 let block_length = self.block_length as usize;
213 let mut q0: Vec<KeyIndex> = Vec::with_capacity(block_length);
214 let mut q1: Vec<KeyIndex> = Vec::with_capacity(block_length);
215 let mut q2: Vec<KeyIndex> = Vec::with_capacity(block_length);
216 let mut stack: Vec<KeyIndex> = Vec::with_capacity(size);
217 let mut sets0: Vec<XorSet> = vec![XorSet::default(); block_length];
218 let mut sets1: Vec<XorSet> = vec![XorSet::default(); block_length];
219 let mut sets2: Vec<XorSet> = vec![XorSet::default(); block_length];
220
221 loop {
222 for key in digests.iter() {
223 let hs = self.geth0h1h2(*key);
224 sets0[hs.h0 as usize].xor_mask ^= hs.h;
225 sets0[hs.h0 as usize].count += 1;
226 sets1[hs.h1 as usize].xor_mask ^= hs.h;
227 sets1[hs.h1 as usize].count += 1;
228 sets2[hs.h2 as usize].xor_mask ^= hs.h;
229 sets2[hs.h2 as usize].count += 1;
230 }
231
232 q0.clear();
233 q1.clear();
234 q2.clear();
235
236 let iter = sets0.iter().enumerate().take(self.block_length as usize);
237 for (i, item) in iter {
238 if item.count == 1 {
239 q0.push(KeyIndex {
240 index: i as u32,
241 hash: item.xor_mask,
242 });
243 }
244 }
245 let iter = sets1.iter().enumerate().take(self.block_length as usize);
246 for (i, item) in iter {
247 if item.count == 1 {
248 q1.push(KeyIndex {
249 index: i as u32,
250 hash: item.xor_mask,
251 });
252 }
253 }
254 let iter = sets2.iter().enumerate().take(self.block_length as usize);
255 for (i, item) in iter {
256 if item.count == 1 {
257 q2.push(KeyIndex {
258 index: i as u32,
259 hash: item.xor_mask,
260 });
261 }
262 }
263
264 stack.clear();
265
266 while !q0.is_empty() || !q1.is_empty() || !q2.is_empty() {
267 while let Some(keyindexvar) = q0.pop() {
268 if sets0[keyindexvar.index as usize].count == 0 {
269 continue;
271 }
272 let hash = keyindexvar.hash;
273 let h1 = self.geth1(hash);
274 let h2 = self.geth2(hash);
275 stack.push(keyindexvar);
276
277 let mut s = unsafe { sets1.get_unchecked_mut(h1 as usize) };
278 s.xor_mask ^= hash;
279 s.count -= 1;
280 if s.count == 1 {
281 q1.push(KeyIndex {
282 index: h1,
283 hash: s.xor_mask,
284 })
285 }
286
287 let mut s = unsafe { sets2.get_unchecked_mut(h2 as usize) };
288 s.xor_mask ^= hash;
289 s.count -= 1;
290 if s.count == 1 {
291 q2.push(KeyIndex {
292 index: h2,
293 hash: s.xor_mask,
294 })
295 }
296 }
297 while let Some(mut keyindexvar) = q1.pop() {
298 if sets1[keyindexvar.index as usize].count == 0 {
299 continue;
300 }
301 let hash = keyindexvar.hash;
302 let h0 = self.geth0(hash);
303 let h2 = self.geth2(hash);
304 keyindexvar.index += self.block_length;
305 stack.push(keyindexvar);
306
307 let mut s = unsafe { sets0.get_unchecked_mut(h0 as usize) };
308 s.xor_mask ^= hash;
309 s.count -= 1;
310 if s.count == 1 {
311 q0.push(KeyIndex {
312 index: h0,
313 hash: s.xor_mask,
314 })
315 }
316
317 let mut s = unsafe { sets2.get_unchecked_mut(h2 as usize) };
318 s.xor_mask ^= hash;
319 s.count -= 1;
320 if s.count == 1 {
321 q2.push(KeyIndex {
322 index: h2,
323 hash: s.xor_mask,
324 })
325 }
326 }
327 while let Some(mut keyindexvar) = q2.pop() {
328 if sets2[keyindexvar.index as usize].count == 0 {
329 continue;
330 }
331 let hash = keyindexvar.hash;
332 let h0 = self.geth0(hash);
333 let h1 = self.geth1(hash);
334 keyindexvar.index += 2 * self.block_length;
335 stack.push(keyindexvar);
336
337 let mut s = unsafe { sets0.get_unchecked_mut(h0 as usize) };
338 s.xor_mask ^= hash;
339 s.count -= 1;
340 if s.count == 1 {
341 q0.push(KeyIndex {
342 index: h0,
343 hash: s.xor_mask,
344 })
345 }
346 let mut s = unsafe { sets1.get_unchecked_mut(h1 as usize) };
347 s.xor_mask ^= hash;
348 s.count -= 1;
349 if s.count == 1 {
350 q1.push(KeyIndex {
351 index: h1,
352 hash: s.xor_mask,
353 })
354 }
355 }
356 }
357
358 if stack.len() == size {
359 break;
360 }
361
362 for item in sets0.iter_mut() {
363 *item = XorSet::default();
364 }
365 for item in sets1.iter_mut() {
366 *item = XorSet::default();
367 }
368 for item in sets2.iter_mut() {
369 *item = XorSet::default();
370 }
371 self.seed = splitmix64(&mut rngcounter)
372 }
373
374 while let Some(ki) = stack.pop() {
375 let mut val = fingerprint(ki.hash) as u8;
376 if ki.index < self.block_length {
377 let h1 = (self.geth1(ki.hash) + self.block_length) as usize;
378 let h2 = (self.geth2(ki.hash) + 2 * self.block_length) as usize;
379 val ^= self.finger_prints[h1] ^ self.finger_prints[h2];
380 } else if ki.index < 2 * self.block_length {
381 let h0 = self.geth0(ki.hash) as usize;
382 let h2 = (self.geth2(ki.hash) + 2 * self.block_length) as usize;
383 val ^= self.finger_prints[h0] ^ self.finger_prints[h2];
384 } else {
385 let h0 = self.geth0(ki.hash) as usize;
386 let h1 = (self.geth1(ki.hash) + self.block_length) as usize;
387 val ^= self.finger_prints[h0] ^ self.finger_prints[h1]
388 }
389 self.finger_prints[ki.index as usize] = val;
390 }
391
392 Ok(())
393 }
394}
395
396impl<H> Xor8<H>
397where
398 H: BuildHasher,
399{
400 pub fn contains<K: ?Sized + Hash>(&self, key: &K) -> bool {
403 let hashed_key = {
404 let mut hasher = self.hash_builder.build_hasher();
405 key.hash(&mut hasher);
406 hasher.finish()
407 };
408 self.contains_key(hashed_key)
409 }
410
411 pub fn contains_key(&self, digest: u64) -> bool {
414 let hash = mixsplit(digest, self.seed);
415 let f = fingerprint(hash) as u8;
416 let r0 = hash as u32;
417 let r1 = hash.rotate_left(21) as u32;
418 let r2 = hash.rotate_left(42) as u32;
419 let h0 = reduce(r0, self.block_length) as usize;
420 let h1 = (reduce(r1, self.block_length) + self.block_length) as usize;
421 let h2 = (reduce(r2, self.block_length) + 2 * self.block_length) as usize;
422 f == (self.finger_prints[h0] ^ self.finger_prints[h1] ^ self.finger_prints[h2])
423 }
424
425 #[allow(dead_code)]
426 fn get_hasher(&self) -> H::Hasher {
427 self.hash_builder.build_hasher()
428 }
429}
430
431impl<H> Xor8<H>
432where
433 H: BuildHasher,
434{
435 fn geth0h1h2(&self, k: u64) -> Hashes {
436 let h = mixsplit(k, self.seed);
437 Hashes {
438 h,
439 h0: reduce(h as u32, self.block_length),
440 h1: reduce(h.rotate_left(21) as u32, self.block_length),
441 h2: reduce(h.rotate_left(42) as u32, self.block_length),
442 }
443 }
444
445 fn geth0(&self, hash: u64) -> u32 {
446 let r0 = hash as u32;
447 reduce(r0, self.block_length)
448 }
449
450 fn geth1(&self, hash: u64) -> u32 {
451 let r1 = hash.rotate_left(21) as u32;
452 reduce(r1, self.block_length)
453 }
454
455 fn geth2(&self, hash: u64) -> u32 {
456 let r2 = hash.rotate_left(42) as u32;
457 reduce(r2, self.block_length)
458 }
459}
460
461impl<H> Xor8<H>
467where
468 H: Into<Vec<u8>> + From<Vec<u8>> + BuildHasher,
469{
470 const SIGNATURE_V1: [u8; 4] = [b'^', b'T', b'L', 1];
476 const SIGNATURE_V2: [u8; 4] = [b'^', b'T', b'L', 2];
477
478 const METADATA_LENGTH: usize = 4 + 8 + 4 + 4 + 4;
483
484 pub fn write_file(&self, path: &ffi::OsStr) -> io::Result<usize>
487 where
488 H: Clone,
489 {
490 let mut f = fs::File::create(path)?;
491 let buf = self.to_bytes();
492 f.write_all(&buf)?;
493 Ok(buf.len())
494 }
495
496 pub fn read_file(path: &ffi::OsStr) -> io::Result<Self>
498 where
499 H: Default,
500 {
501 let mut f = fs::File::open(path)?;
502 let mut data = Vec::new();
503 f.read_to_end(&mut data)?;
504 Self::from_bytes(data)
505 }
506
507 pub fn to_bytes(&self) -> Vec<u8>
508 where
509 H: Clone,
510 {
511 let capacity = Self::METADATA_LENGTH + self.finger_prints.len();
512 let mut buf: Vec<u8> = Vec::with_capacity(capacity);
513 buf.extend_from_slice(&Xor8::<H>::SIGNATURE_V2);
514 buf.extend_from_slice(&self.seed.to_be_bytes());
515 buf.extend_from_slice(&self.block_length.to_be_bytes());
516 buf.extend_from_slice(&(self.finger_prints.len() as u32).to_be_bytes());
517
518 let hb_binary: Vec<u8> = self.hash_builder.clone().into();
519 buf.extend_from_slice(&(hb_binary.len() as u32).to_be_bytes());
520
521 buf.extend_from_slice(&self.finger_prints);
522 buf.extend_from_slice(&hb_binary);
523 buf
524 }
525
526 pub fn from_bytes(buf: Vec<u8>) -> io::Result<Self>
527 where
528 H: Default,
529 {
530 use std::io::Error;
531
532 let mut n = 0;
533
534 if Self::METADATA_LENGTH > buf.len() {
536 return Err(Error::new(ErrorKind::InvalidData, "invalid byte slice"));
537 }
538
539 if buf[n..4] == Xor8::<H>::SIGNATURE_V1 {
541 return Self::from_bytes_v1(buf);
542 } else if buf[n..4] != Xor8::<H>::SIGNATURE_V2 {
543 return Err(Error::new(
544 ErrorKind::InvalidData,
545 "File signature incorrect",
546 ));
547 }
548
549 n += 4;
550 let seed = u64::from_be_bytes(buf[n..n + 8].try_into().unwrap());
552 n += 8;
553 let block_length = u32::from_be_bytes(buf[n..n + 4].try_into().unwrap());
555 n += 4;
556 let fp_len = u32::from_be_bytes(buf[n..n + 4].try_into().unwrap()) as usize;
558 n += 4;
559 let hb_len = u32::from_be_bytes(buf[n..n + 4].try_into().unwrap()) as usize;
561 n += 4;
562
563 if buf[n..].len() < (fp_len + hb_len) {
564 return Err(Error::new(ErrorKind::InvalidData, "invalid byte slice"));
565 }
566
567 let finger_prints = buf[n..n + fp_len].to_vec();
569 n += fp_len;
570 let hash_builder: H = buf[n..n + hb_len].to_vec().into();
572
573 Ok(Xor8 {
574 keys: None,
575 hash_builder,
576 seed,
577 block_length,
578 finger_prints,
579 })
580 }
581
582 fn from_bytes_v1(buf: Vec<u8>) -> io::Result<Self>
583 where
584 H: Default,
585 {
586 use std::io::Error;
587
588 if Self::METADATA_LENGTH > buf.len() {
590 return Err(Error::new(ErrorKind::InvalidData, "invalid byte slice"));
591 }
592 if buf[..4] != Xor8::<H>::SIGNATURE_V1 {
593 return Err(Error::new(
594 ErrorKind::InvalidData,
595 "File signature incorrect",
596 ));
597 }
598 let fp_len = u32::from_be_bytes(buf[16..20].try_into().unwrap()) as usize;
599 if buf[20..].len() < fp_len {
600 return Err(Error::new(ErrorKind::InvalidData, "invalid byte slice"));
601 }
602 Ok(Xor8 {
603 keys: None,
604 hash_builder: H::default(),
605 seed: u64::from_be_bytes(buf[4..12].try_into().unwrap()),
606 block_length: u32::from_be_bytes(buf[12..16].try_into().unwrap()),
607 finger_prints: buf[20..].to_vec(),
608 })
609 }
610}
611
612#[cfg(test)]
613#[path = "xor8_test.rs"]
614mod xor8_test;