pub struct MinHash<Word, const PERMUTATIONS: usize> { /* private fields */ }
Implementations§
Source§impl<Word: Min + XorShift + Copy + Eq + Maximal + Zero, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
impl<Word: Min + XorShift + Copy + Eq + Maximal + Zero, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
Source§impl<Word: Min + XorShift + Copy + Eq, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
impl<Word: Min + XorShift + Copy + Eq, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
Sourcepub fn may_contain_value_with_siphashes13<H: Hash>(&self, value: H) -> bool
pub fn may_contain_value_with_siphashes13<H: Hash>(&self, value: H) -> bool
Returns whether the MinHash may contain the provided value, using the SipHasher13.
§Arguments
value
- The value to check.
§Implementative details
The procedure estimates whether the provided value is contained in the current MinHash data structure by checking whether all of the words are smaller or equal to all of the hash values that are calculated using the provided value as seed.
§Examples
use minhash_rs::prelude::*;
let mut minhash = MinHash::<u64, 128>::new();
assert!(!minhash.may_contain_value_with_siphashes13(42));
minhash.insert_with_siphashes13(42);
assert!(minhash.may_contain_value_with_siphashes13(42));
minhash.insert_with_siphashes13(47);
assert!(minhash.may_contain_value_with_siphashes13(47));
Sourcepub fn insert_with_siphashes13<H: Hash>(&mut self, value: H)
pub fn insert_with_siphashes13<H: Hash>(&mut self, value: H)
Insert a value into the MinHash using the SipHasher13.
§Arguments
value
- The value to insert.
§Examples
In the following example we show how we can create a MinHash and insert a value in it.
use minhash_rs::prelude::*;
let mut minhash = MinHash::<u64, 128>::new();
assert!(!minhash.may_contain_value_with_siphashes13(42));
minhash.insert_with_siphashes13(42);
assert!(minhash.may_contain_value_with_siphashes13(42));
minhash.insert_with_siphashes13(47);
assert!(minhash.may_contain_value_with_siphashes13(47));
Sourcepub fn may_contain_value_with_keyed_siphashes13<H: Hash>(
&self,
value: H,
key0: u64,
key1: u64,
) -> bool
pub fn may_contain_value_with_keyed_siphashes13<H: Hash>( &self, value: H, key0: u64, key1: u64, ) -> bool
Returns whether the MinHash may contain the provided value, using the keyed SipHasher13.
§Arguments
value
- The value to check.key0
- The first key.key1
- The second key.
§Implementative details
The procedure estimates whether the provided value is contained in the current MinHash data structure by checking whether all of the words are smaller or equal to all of the hash values that are calculated using the provided value as seed.
§Examples
use minhash_rs::prelude::*;
let mut minhash = MinHash::<u64, 128>::new();
let key0 = 0x0123456789ABCDEF;
let key1 = 0xFEDCBA9876543210;
assert!(!minhash.may_contain_value_with_keyed_siphashes13(42, key0, key1));
minhash.insert_with_keyed_siphashes13(42, key0, key1);
assert!(minhash.may_contain_value_with_keyed_siphashes13(42, key0, key1));
minhash.insert_with_keyed_siphashes13(47, key0, key1);
assert!(minhash.may_contain_value_with_keyed_siphashes13(47, key0, key1));
Sourcepub fn insert_with_keyed_siphashes13<H: Hash>(
&mut self,
value: H,
key0: u64,
key1: u64,
)
pub fn insert_with_keyed_siphashes13<H: Hash>( &mut self, value: H, key0: u64, key1: u64, )
Insert a value into the MinHash using the keyed SipHasher13.
§Arguments
value
- The value to insert.key0
- The first key.key1
- The second key.
§Examples
In the following example we show how we can create a MinHash and insert a value in it.
use minhash_rs::prelude::*;
let mut minhash = MinHash::<u64, 128>::new();
let key0 = 0x0123456789ABCDEF;
let key1 = 0xFEDCBA9876543210;
assert!(!minhash.may_contain_value_with_keyed_siphashes13(42, key0, key1));
minhash.insert_with_keyed_siphashes13(42, key0, key1);
assert!(minhash.may_contain_value_with_keyed_siphashes13(42, key0, key1));
minhash.insert_with_keyed_siphashes13(47, key0, key1);
assert!(minhash.may_contain_value_with_keyed_siphashes13(47, key0, key1));
Sourcepub fn may_contain_value_with_fvn<H: Hash>(&self, value: H) -> bool
pub fn may_contain_value_with_fvn<H: Hash>(&self, value: H) -> bool
Returns whether the MinHash may contain the provided value, using the FVN.
§Arguments
value
- The value to check.
§Implementative details
The procedure estimates whether the provided value is contained in the current MinHash data structure by checking whether all of the words are smaller or equal to all of the hash values that are calculated using the provided value as seed.
§Examples
use minhash_rs::prelude::*;
let mut minhash = MinHash::<u64, 128>::new();
assert!(!minhash.may_contain_value_with_fvn(42));
minhash.insert_with_fvn(42);
assert!(minhash.may_contain_value_with_fvn(42));
minhash.insert_with_fvn(47);
assert!(minhash.may_contain_value_with_fvn(47));
Sourcepub fn insert_with_fvn<H: Hash>(&mut self, value: H)
pub fn insert_with_fvn<H: Hash>(&mut self, value: H)
Insert a value into the MinHash using the FVN.
§Arguments
value
- The value to insert.
§Examples
In the following example we show how we can create a MinHash and insert a value in it.
use minhash_rs::prelude::*;
let mut minhash = MinHash::<u64, 128>::new();
assert!(!minhash.may_contain_value_with_fvn(42));
minhash.insert_with_fvn(42);
assert!(minhash.may_contain_value_with_fvn(42));
minhash.insert_with_fvn(47);
assert!(minhash.may_contain_value_with_fvn(47));
Sourcepub fn may_contain_value_with_keyed_fvn<H: Hash>(
&self,
value: H,
key: u64,
) -> bool
pub fn may_contain_value_with_keyed_fvn<H: Hash>( &self, value: H, key: u64, ) -> bool
Returns whether the MinHash may contain the provided value, using the keyed FVN.
§Arguments
value
- The value to check.key
- The first key.
§Implementative details
The procedure estimates whether the provided value is contained in the current MinHash data structure by checking whether all of the words are smaller or equal to all of the hash values that are calculated using the provided value as seed.
§Examples
use minhash_rs::prelude::*;
let mut minhash = MinHash::<u64, 128>::new();
let key = 0x0123456789ABCDEF;
assert!(!minhash.may_contain_value_with_keyed_fvn(42, key));
minhash.insert_with_keyed_fvn(42, key);
assert!(minhash.may_contain_value_with_keyed_fvn(42, key));
minhash.insert_with_keyed_fvn(47, key);
assert!(minhash.may_contain_value_with_keyed_fvn(47, key));
Sourcepub fn insert_with_keyed_fvn<H: Hash>(&mut self, value: H, key: u64)
pub fn insert_with_keyed_fvn<H: Hash>(&mut self, value: H, key: u64)
Insert a value into the MinHash using the keyed FVN.
§Arguments
value
- The value to insert.key
- The first key.
§Examples
In the following example we show how we can create a MinHash and insert a value in it.
use minhash_rs::prelude::*;
let mut minhash = MinHash::<u64, 128>::new();
let key = 0x0123456789ABCDEF;
assert!(!minhash.may_contain_value_with_keyed_fvn(42, key));
minhash.insert_with_keyed_fvn(42, key);
assert!(minhash.may_contain_value_with_keyed_fvn(42, key));
minhash.insert_with_keyed_fvn(47, key);
assert!(minhash.may_contain_value_with_keyed_fvn(47, key));
Source§impl<Word, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
impl<Word, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
Sourcepub fn number_of_permutations(&self) -> usize
pub fn number_of_permutations(&self) -> usize
Returns the number of permutations.
§Examples
use minhash_rs::prelude::*;
let minhash = MinHash::<u64, 128>::new();
assert_eq!(minhash.number_of_permutations(), 128);
Sourcepub fn memory(&self) -> usize
pub fn memory(&self) -> usize
Returns memory required to store the MinHash in bits.
§Examples
For a MinHash with 128 permutations and 64 bit words, the memory required is 128 * 64 * 8.
use minhash_rs::prelude::*;
let minhash = MinHash::<u64, 128>::new();
assert_eq!(minhash.memory(), 128 * 64);
For a MinHash with 128 permutations and 32 bit words, the memory required is 128 * 32 * 8.
use minhash_rs::prelude::*;
let minhash = MinHash::<u32, 128>::new();
assert_eq!(minhash.memory(), 128 * 32);
Source§impl<Word: Eq, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
impl<Word: Eq, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
Sourcepub fn estimate_jaccard_index(&self, other: &Self) -> f64
pub fn estimate_jaccard_index(&self, other: &Self) -> f64
Calculate the similarity between two MinHashes.
§Arguments
other
- The other MinHash to compare to.
§Examples
use std::collections::HashSet;
use minhash_rs::prelude::*;
let first_set: HashSet<u64> = [1_u64, 2_u64, 3_u64, 4_u64, 5_u64, 6_u64, 7_u64, 8_u64].iter().copied().collect();
let second_set: HashSet<u64> = [5_u64, 6_u64, 7_u64, 8_u64, 9_u64, 10_u64, 11_u64, 12_u64].iter().copied().collect();
let mut first_minhash: MinHash<u64, 128> = first_set.iter().collect();
let mut second_minhash: MinHash<u64, 128> = second_set.iter().collect();
let approximation = first_minhash.estimate_jaccard_index(&second_minhash);
let ground_truth = first_set.intersection(&second_set).count() as f64 / first_set.union(&second_set).count() as f64;
assert!((approximation - ground_truth).abs() < 0.01, concat!(
"We expected the approximation to be close to the ground truth, ",
"but got an error of {} instead. The ground truth is {} and the approximation is {}."
), (approximation - ground_truth).abs(), ground_truth, approximation
);
Trait Implementations§
Source§impl<Word, const PERMUTATIONS: usize> AsRef<[Word]> for MinHash<Word, PERMUTATIONS>
We also implement AsRef and AsMut for direct access on the MinHash words.
impl<Word, const PERMUTATIONS: usize> AsRef<[Word]> for MinHash<Word, PERMUTATIONS>
We also implement AsRef and AsMut for direct access on the MinHash words.
Source§impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU16, PERMUTATIONS> for MinHash<u16, PERMUTATIONS>
impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU16, PERMUTATIONS> for MinHash<u16, PERMUTATIONS>
Source§impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU32, PERMUTATIONS> for MinHash<u32, PERMUTATIONS>
impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU32, PERMUTATIONS> for MinHash<u32, PERMUTATIONS>
Source§impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU64, PERMUTATIONS> for MinHash<u64, PERMUTATIONS>
impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU64, PERMUTATIONS> for MinHash<u64, PERMUTATIONS>
Source§impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU8, PERMUTATIONS> for MinHash<u8, PERMUTATIONS>
impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU8, PERMUTATIONS> for MinHash<u8, PERMUTATIONS>
Source§impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicUsize, PERMUTATIONS> for MinHash<usize, PERMUTATIONS>
impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicUsize, PERMUTATIONS> for MinHash<usize, PERMUTATIONS>
Source§fn iter_atomic<'a>(&'a self) -> impl Iterator<Item = &'a AtomicUsize>where
Self: 'a,
fn iter_atomic<'a>(&'a self) -> impl Iterator<Item = &'a AtomicUsize>where
Self: 'a,
Iterate over the words.
Source§impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAnd<&MinHash<Word, PERMUTATATIONS>> for MinHash<Word, PERMUTATATIONS>
impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAnd<&MinHash<Word, PERMUTATATIONS>> for MinHash<Word, PERMUTATATIONS>
Source§impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAnd for MinHash<Word, PERMUTATATIONS>
impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAnd for MinHash<Word, PERMUTATATIONS>
Source§impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAndAssign<&MinHash<Word, PERMUTATATIONS>> for MinHash<Word, PERMUTATATIONS>
impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAndAssign<&MinHash<Word, PERMUTATATIONS>> for MinHash<Word, PERMUTATATIONS>
Source§fn bitand_assign(&mut self, rhs: &Self)
fn bitand_assign(&mut self, rhs: &Self)
&=
operation. Read moreSource§impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAndAssign for MinHash<Word, PERMUTATATIONS>
impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAndAssign for MinHash<Word, PERMUTATATIONS>
Source§fn bitand_assign(&mut self, rhs: Self)
fn bitand_assign(&mut self, rhs: Self)
&=
operation. Read moreSource§impl<Word: Min + Clone + Eq + Maximal + XorShift, A: Hash, const PERMUTATATIONS: usize> FromIterator<A> for MinHash<Word, PERMUTATATIONS>
impl<Word: Min + Clone + Eq + Maximal + XorShift, A: Hash, const PERMUTATATIONS: usize> FromIterator<A> for MinHash<Word, PERMUTATATIONS>
Source§fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self
fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self
Creates a new MinHash and adds all elements from an iterator to it.
§Examples
use minhash_rs::prelude::*;
let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
let minhash = MinHash::<u64, 128>::from_iter(data.clone());
for item in data {
assert!(minhash.may_contain_value_with_siphashes13(item));
}
Source§impl<W: Maximal, const PERMUTATIONS: usize> Index<usize> for MinHash<W, PERMUTATIONS>
We also provide indexing for the MinHash.
impl<W: Maximal, const PERMUTATIONS: usize> Index<usize> for MinHash<W, PERMUTATIONS>
We also provide indexing for the MinHash.