find_simdoc/lsh/
minhash.rs1use rand_xoshiro::rand_core::{RngCore, SeedableRng};
3
4pub struct MinHasher {
6 seed: u64,
7}
8
9impl MinHasher {
10 pub const fn new(seed: u64) -> Self {
12 Self { seed }
13 }
14
15 pub fn iter<'a>(&self, feature: &'a [u64]) -> MinHashIter<'a> {
17 MinHashIter {
18 feature,
19 seeder: rand_xoshiro::SplitMix64::seed_from_u64(self.seed),
20 }
21 }
22}
23
24pub struct MinHashIter<'a> {
26 feature: &'a [u64],
27 seeder: rand_xoshiro::SplitMix64,
28}
29
30impl<'a> Iterator for MinHashIter<'a> {
31 type Item = u64;
32
33 fn next(&mut self) -> Option<Self::Item> {
34 let mut x = 0;
35 for _ in 0..64 {
36 let seed = self.seeder.next_u64();
37 let h = self
38 .feature
39 .iter()
40 .map(|&i| crate::lsh::hash_u64(i, seed))
41 .min()
42 .unwrap();
43 x = (x << 1) | (h & 1);
44 }
45 Some(x)
46 }
47}