find_simdoc/lsh/
simhash.rs1use rand_xoshiro::rand_core::{RngCore, SeedableRng};
3
4pub struct SimHasher {
6 seed: u64,
7}
8
9impl SimHasher {
10 pub const fn new(seed: u64) -> Self {
12 Self { seed }
13 }
14
15 pub fn iter<'a>(&self, feature: &'a [(u64, f64)]) -> SimHashIter<'a> {
17 SimHashIter {
18 feature,
19 seeder: rand_xoshiro::SplitMix64::seed_from_u64(self.seed),
20 weights: [0.; 64],
21 }
22 }
23}
24
25pub struct SimHashIter<'a> {
27 feature: &'a [(u64, f64)],
28 seeder: rand_xoshiro::SplitMix64,
29 weights: [f64; 64],
30}
31
32impl<'a> Iterator for SimHashIter<'a> {
33 type Item = u64;
34
35 fn next(&mut self) -> Option<Self::Item> {
36 self.weights.fill(0.);
37 let seed = self.seeder.next_u64();
38 for (h, x) in self
39 .feature
40 .iter()
41 .map(|&(i, x)| (crate::lsh::hash_u64(i, seed), x))
42 {
43 for (j, w) in self.weights.iter_mut().enumerate() {
44 if (h >> j) & 1 == 0 {
45 *w += x;
46 } else {
47 *w -= x;
48 }
49 }
50 }
51 Some(
52 self.weights
53 .iter()
54 .fold(0, |acc, w| if *w >= 0. { (acc << 1) | 1 } else { acc << 1 }),
55 )
56 }
57}