find_simdoc/
lsh.rs

1//! Locality-sensitive hashings.
2pub mod minhash;
3pub mod simhash;
4
5use std::hash::Hash;
6
7use hashbrown::HashSet;
8use rand_xoshiro::rand_core::{RngCore, SeedableRng};
9
10/// Generates a hash value.
11#[inline(always)]
12pub(crate) fn hash_u64(x: u64, seed: u64) -> u64 {
13    rand_xoshiro::SplitMix64::seed_from_u64(x ^ seed).next_u64()
14}
15
16/// Computes the Jaccard distance.
17///
18/// # Examples
19///
20/// ```
21/// use find_simdoc::lsh::jaccard_distance;
22///
23/// let x = vec![1, 2, 4];
24/// let y = vec![1, 2, 5, 7];
25/// assert_eq!(jaccard_distance(x, y), 0.6);
26/// ```
27pub fn jaccard_distance<I, T>(lhs: I, rhs: I) -> f64
28where
29    I: IntoIterator<Item = T>,
30    T: Hash + Eq,
31{
32    let a = HashSet::<T>::from_iter(lhs);
33    let b = HashSet::<T>::from_iter(rhs);
34    1. - (a.intersection(&b).count() as f64) / (a.union(&b).count() as f64)
35}