Struct MinHash

Source
pub struct MinHash<Word, const PERMUTATIONS: usize> { /* private fields */ }

Implementations§

Source§

impl<Word: Maximal, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>

Source

pub fn new() -> Self

Create a new MinHash.

§Examples
use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::new();
Source§

impl<Word: Min + XorShift + Copy + Eq + Maximal + Zero, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
where u64: Primitive<Word>,

Source

pub fn is_empty(&self) -> bool

Returns whether the MinHash is empty.

§Examples
use minhash_rs::prelude::*;

let mut minhash = MinHash::<u8, 16>::new();

assert!(minhash.is_empty());
minhash.insert_with_siphashes13(42);
assert!(!minhash.is_empty());
Source

pub fn is_full(&self) -> bool

Returns whether the MinHash is fully saturated.

§Examples
use minhash_rs::prelude::*;

let mut minhash = MinHash::<u8, 16>::new();

assert!(!minhash.is_full());

for i in 0..1024 {
   minhash.insert_with_siphashes13(i);
}

assert!(minhash.is_full());
Source§

impl<Word: Min + XorShift + Copy + Eq, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>
where Self: IterHashes<Word, PERMUTATIONS>, u64: Primitive<Word>,

Source

pub fn may_contain_value_with_siphashes13<H: Hash>(&self, value: H) -> bool

Returns whether the MinHash may contain the provided value, using the SipHasher13.

§Arguments
  • value - The value to check.
§Implementative details

The procedure estimates whether the provided value is contained in the current MinHash data structure by checking whether all of the words are smaller or equal to all of the hash values that are calculated using the provided value as seed.

§Examples
use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::new();

assert!(!minhash.may_contain_value_with_siphashes13(42));
minhash.insert_with_siphashes13(42);
assert!(minhash.may_contain_value_with_siphashes13(42));
minhash.insert_with_siphashes13(47);
assert!(minhash.may_contain_value_with_siphashes13(47));
Source

pub fn insert_with_siphashes13<H: Hash>(&mut self, value: H)

Insert a value into the MinHash using the SipHasher13.

§Arguments
  • value - The value to insert.
§Examples

In the following example we show how we can create a MinHash and insert a value in it.

use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::new();

assert!(!minhash.may_contain_value_with_siphashes13(42));
minhash.insert_with_siphashes13(42);
assert!(minhash.may_contain_value_with_siphashes13(42));
minhash.insert_with_siphashes13(47);
assert!(minhash.may_contain_value_with_siphashes13(47));
Source

pub fn may_contain_value_with_keyed_siphashes13<H: Hash>( &self, value: H, key0: u64, key1: u64, ) -> bool

Returns whether the MinHash may contain the provided value, using the keyed SipHasher13.

§Arguments
  • value - The value to check.
  • key0 - The first key.
  • key1 - The second key.
§Implementative details

The procedure estimates whether the provided value is contained in the current MinHash data structure by checking whether all of the words are smaller or equal to all of the hash values that are calculated using the provided value as seed.

§Examples
use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::new();
let key0 = 0x0123456789ABCDEF;
let key1 = 0xFEDCBA9876543210;

assert!(!minhash.may_contain_value_with_keyed_siphashes13(42, key0, key1));
minhash.insert_with_keyed_siphashes13(42, key0, key1);
assert!(minhash.may_contain_value_with_keyed_siphashes13(42, key0, key1));
minhash.insert_with_keyed_siphashes13(47, key0, key1);
assert!(minhash.may_contain_value_with_keyed_siphashes13(47, key0, key1));
Source

pub fn insert_with_keyed_siphashes13<H: Hash>( &mut self, value: H, key0: u64, key1: u64, )

Insert a value into the MinHash using the keyed SipHasher13.

§Arguments
  • value - The value to insert.
  • key0 - The first key.
  • key1 - The second key.
§Examples

In the following example we show how we can create a MinHash and insert a value in it.

use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::new();
let key0 = 0x0123456789ABCDEF;
let key1 = 0xFEDCBA9876543210;

assert!(!minhash.may_contain_value_with_keyed_siphashes13(42, key0, key1));
minhash.insert_with_keyed_siphashes13(42, key0, key1);
assert!(minhash.may_contain_value_with_keyed_siphashes13(42, key0, key1));
minhash.insert_with_keyed_siphashes13(47, key0, key1);
assert!(minhash.may_contain_value_with_keyed_siphashes13(47, key0, key1));
Source

pub fn may_contain_value_with_fvn<H: Hash>(&self, value: H) -> bool

Returns whether the MinHash may contain the provided value, using the FVN.

§Arguments
  • value - The value to check.
§Implementative details

The procedure estimates whether the provided value is contained in the current MinHash data structure by checking whether all of the words are smaller or equal to all of the hash values that are calculated using the provided value as seed.

§Examples
use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::new();

assert!(!minhash.may_contain_value_with_fvn(42));
minhash.insert_with_fvn(42);
assert!(minhash.may_contain_value_with_fvn(42));
minhash.insert_with_fvn(47);
assert!(minhash.may_contain_value_with_fvn(47));
Source

pub fn insert_with_fvn<H: Hash>(&mut self, value: H)

Insert a value into the MinHash using the FVN.

§Arguments
  • value - The value to insert.
§Examples

In the following example we show how we can create a MinHash and insert a value in it.

use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::new();

assert!(!minhash.may_contain_value_with_fvn(42));
minhash.insert_with_fvn(42);
assert!(minhash.may_contain_value_with_fvn(42));
minhash.insert_with_fvn(47);
assert!(minhash.may_contain_value_with_fvn(47));
Source

pub fn may_contain_value_with_keyed_fvn<H: Hash>( &self, value: H, key: u64, ) -> bool

Returns whether the MinHash may contain the provided value, using the keyed FVN.

§Arguments
  • value - The value to check.
  • key - The first key.
§Implementative details

The procedure estimates whether the provided value is contained in the current MinHash data structure by checking whether all of the words are smaller or equal to all of the hash values that are calculated using the provided value as seed.

§Examples
use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::new();
let key = 0x0123456789ABCDEF;

assert!(!minhash.may_contain_value_with_keyed_fvn(42, key));
minhash.insert_with_keyed_fvn(42, key);
assert!(minhash.may_contain_value_with_keyed_fvn(42, key));
minhash.insert_with_keyed_fvn(47, key);
assert!(minhash.may_contain_value_with_keyed_fvn(47, key));
Source

pub fn insert_with_keyed_fvn<H: Hash>(&mut self, value: H, key: u64)

Insert a value into the MinHash using the keyed FVN.

§Arguments
  • value - The value to insert.
  • key - The first key.
§Examples

In the following example we show how we can create a MinHash and insert a value in it.

use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::new();
let key = 0x0123456789ABCDEF;

assert!(!minhash.may_contain_value_with_keyed_fvn(42, key));
minhash.insert_with_keyed_fvn(42, key);
assert!(minhash.may_contain_value_with_keyed_fvn(42, key));
minhash.insert_with_keyed_fvn(47, key);
assert!(minhash.may_contain_value_with_keyed_fvn(47, key));
Source§

impl<Word, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>

Source

pub fn iter(&self) -> impl Iterator<Item = &Word>

Iterate over the words.

Source

pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut Word>

Iterate over the words mutably.

Source

pub fn number_of_permutations(&self) -> usize

Returns the number of permutations.

§Examples
use minhash_rs::prelude::*;

let minhash = MinHash::<u64, 128>::new();

assert_eq!(minhash.number_of_permutations(), 128);
Source

pub fn memory(&self) -> usize

Returns memory required to store the MinHash in bits.

§Examples

For a MinHash with 128 permutations and 64 bit words, the memory required is 128 * 64 * 8.

use minhash_rs::prelude::*;

let minhash = MinHash::<u64, 128>::new();

assert_eq!(minhash.memory(), 128 * 64);

For a MinHash with 128 permutations and 32 bit words, the memory required is 128 * 32 * 8.

use minhash_rs::prelude::*;

let minhash = MinHash::<u32, 128>::new();

assert_eq!(minhash.memory(), 128 * 32);
Source§

impl<Word: Eq, const PERMUTATIONS: usize> MinHash<Word, PERMUTATIONS>

Source

pub fn estimate_jaccard_index(&self, other: &Self) -> f64

Calculate the similarity between two MinHashes.

§Arguments
  • other - The other MinHash to compare to.
§Examples
use std::collections::HashSet;
use minhash_rs::prelude::*;

let first_set: HashSet<u64> = [1_u64, 2_u64, 3_u64, 4_u64, 5_u64, 6_u64, 7_u64, 8_u64].iter().copied().collect();
let second_set: HashSet<u64> = [5_u64, 6_u64, 7_u64, 8_u64, 9_u64, 10_u64, 11_u64, 12_u64].iter().copied().collect();

let mut first_minhash: MinHash<u64, 128> = first_set.iter().collect();
let mut second_minhash: MinHash<u64, 128> = second_set.iter().collect();

let approximation = first_minhash.estimate_jaccard_index(&second_minhash);
let ground_truth = first_set.intersection(&second_set).count() as f64 / first_set.union(&second_set).count() as f64;

assert!((approximation - ground_truth).abs() < 0.01, concat!(
    "We expected the approximation to be close to the ground truth, ",
   "but got an error of {} instead. The ground truth is {} and the approximation is {}."
   ), (approximation - ground_truth).abs(), ground_truth, approximation
);

Trait Implementations§

Source§

impl<Word, const PERMUTATIONS: usize> AsMut<[Word]> for MinHash<Word, PERMUTATIONS>

Source§

fn as_mut(&mut self) -> &mut [Word]

Converts this type into a mutable reference of the (usually inferred) input type.
Source§

impl<Word, const PERMUTATIONS: usize> AsRef<[Word]> for MinHash<Word, PERMUTATIONS>

We also implement AsRef and AsMut for direct access on the MinHash words.

Source§

fn as_ref(&self) -> &[Word]

Converts this type into a shared reference of the (usually inferred) input type.
Source§

impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU16, PERMUTATIONS> for MinHash<u16, PERMUTATIONS>

Source§

fn iter_atomic<'a>(&'a self) -> impl Iterator<Item = &'a AtomicU16>
where Self: 'a,

Iterate over the words.

Source§

fn fetch_insert_with_siphashes13<H: Hash>(&self, value: H, ordering: Ordering)

Insert a value into the MinHash atomically, with SipHasher13. Read more
Source§

fn fetch_insert_with_keyed_siphashes13<H: Hash>( &self, value: H, key0: u64, key1: u64, ordering: Ordering, )

Insert a value into the MinHash atomically, with keyed SipHasher13. Read more
Source§

impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU32, PERMUTATIONS> for MinHash<u32, PERMUTATIONS>

Source§

fn iter_atomic<'a>(&'a self) -> impl Iterator<Item = &'a AtomicU32>
where Self: 'a,

Iterate over the words.

Source§

fn fetch_insert_with_siphashes13<H: Hash>(&self, value: H, ordering: Ordering)

Insert a value into the MinHash atomically, with SipHasher13. Read more
Source§

fn fetch_insert_with_keyed_siphashes13<H: Hash>( &self, value: H, key0: u64, key1: u64, ordering: Ordering, )

Insert a value into the MinHash atomically, with keyed SipHasher13. Read more
Source§

impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU64, PERMUTATIONS> for MinHash<u64, PERMUTATIONS>

Source§

fn iter_atomic<'a>(&'a self) -> impl Iterator<Item = &'a AtomicU64>
where Self: 'a,

Iterate over the words.

Source§

fn fetch_insert_with_siphashes13<H: Hash>(&self, value: H, ordering: Ordering)

Insert a value into the MinHash atomically, with SipHasher13. Read more
Source§

fn fetch_insert_with_keyed_siphashes13<H: Hash>( &self, value: H, key0: u64, key1: u64, ordering: Ordering, )

Insert a value into the MinHash atomically, with keyed SipHasher13. Read more
Source§

impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicU8, PERMUTATIONS> for MinHash<u8, PERMUTATIONS>

Source§

fn iter_atomic<'a>(&'a self) -> impl Iterator<Item = &'a AtomicU8>
where Self: 'a,

Iterate over the words.

Source§

fn fetch_insert_with_siphashes13<H: Hash>(&self, value: H, ordering: Ordering)

Insert a value into the MinHash atomically, with SipHasher13. Read more
Source§

fn fetch_insert_with_keyed_siphashes13<H: Hash>( &self, value: H, key0: u64, key1: u64, ordering: Ordering, )

Insert a value into the MinHash atomically, with keyed SipHasher13. Read more
Source§

impl<const PERMUTATIONS: usize> AtomicMinHash<AtomicUsize, PERMUTATIONS> for MinHash<usize, PERMUTATIONS>

Source§

fn iter_atomic<'a>(&'a self) -> impl Iterator<Item = &'a AtomicUsize>
where Self: 'a,

Iterate over the words.

Source§

fn fetch_insert_with_siphashes13<H: Hash>(&self, value: H, ordering: Ordering)

Insert a value into the MinHash atomically, with SipHasher13. Read more
Source§

fn fetch_insert_with_keyed_siphashes13<H: Hash>( &self, value: H, key0: u64, key1: u64, ordering: Ordering, )

Insert a value into the MinHash atomically, with keyed SipHasher13. Read more
Source§

impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAnd<&MinHash<Word, PERMUTATATIONS>> for MinHash<Word, PERMUTATATIONS>

Source§

type Output = MinHash<Word, PERMUTATATIONS>

The resulting type after applying the & operator.
Source§

fn bitand(self, rhs: &Self) -> Self::Output

Performs the & operation. Read more
Source§

impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAnd for MinHash<Word, PERMUTATATIONS>

Source§

type Output = MinHash<Word, PERMUTATATIONS>

The resulting type after applying the & operator.
Source§

fn bitand(self, rhs: Self) -> Self::Output

Performs the & operation. Read more
Source§

impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAndAssign<&MinHash<Word, PERMUTATATIONS>> for MinHash<Word, PERMUTATATIONS>

Source§

fn bitand_assign(&mut self, rhs: &Self)

Performs the &= operation. Read more
Source§

impl<Word: Min + Clone + Eq, const PERMUTATATIONS: usize> BitAndAssign for MinHash<Word, PERMUTATATIONS>

Source§

fn bitand_assign(&mut self, rhs: Self)

Performs the &= operation. Read more
Source§

impl<Word: Clone, const PERMUTATIONS: usize> Clone for MinHash<Word, PERMUTATIONS>

Source§

fn clone(&self) -> MinHash<Word, PERMUTATIONS>

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl<Word: Debug, const PERMUTATIONS: usize> Debug for MinHash<Word, PERMUTATIONS>

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<Word: Maximal, const PERMUTATIONS: usize> Default for MinHash<Word, PERMUTATIONS>

Source§

fn default() -> Self

Create a new MinHash with the maximal value.

§Examples
use minhash_rs::prelude::*;

let mut minhash = MinHash::<u64, 128>::default();

assert_eq!(minhash, MinHash::<u64, 128>::new());
Source§

impl<Word: Min + Clone + Eq + Maximal + XorShift, A: Hash, const PERMUTATATIONS: usize> FromIterator<A> for MinHash<Word, PERMUTATATIONS>
where u64: Primitive<Word>,

Source§

fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self

Creates a new MinHash and adds all elements from an iterator to it.

§Examples
use minhash_rs::prelude::*;

let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
let minhash = MinHash::<u64, 128>::from_iter(data.clone());

for item in data {
    assert!(minhash.may_contain_value_with_siphashes13(item));
}
Source§

impl<Word: Hash, const PERMUTATIONS: usize> Hash for MinHash<Word, PERMUTATIONS>

Source§

fn hash<__H: Hasher>(&self, state: &mut __H)

Feeds this value into the given Hasher. Read more
1.3.0 · Source§

fn hash_slice<H>(data: &[Self], state: &mut H)
where H: Hasher, Self: Sized,

Feeds a slice of this type into the given Hasher. Read more
Source§

impl<W: Maximal, const PERMUTATIONS: usize> Index<usize> for MinHash<W, PERMUTATIONS>

We also provide indexing for the MinHash.

Source§

type Output = W

The returned type after indexing.
Source§

fn index(&self, index: usize) -> &Self::Output

Performs the indexing (container[index]) operation. Read more
Source§

impl<W: Maximal, const PERMUTATIONS: usize> IndexMut<usize> for MinHash<W, PERMUTATIONS>

Source§

fn index_mut(&mut self, index: usize) -> &mut Self::Output

Performs the mutable indexing (container[index]) operation. Read more
Source§

impl<Word: Min + XorShift + Copy + Eq, const PERMUTATIONS: usize> IterHashes<Word, PERMUTATIONS> for MinHash<Word, PERMUTATIONS>
where u64: Primitive<Word>,

Source§

fn iter_hashes_from_value<H: Hash, HS: Hasher>( value: H, hasher: HS, ) -> impl Iterator<Item = Word>

Iterate on the hashes from the provided value and hasher. Read more
Source§

fn iter_siphashes13_from_value<H: Hash>(value: H) -> impl Iterator<Item = Word>

Iterate on the SipHasher13 hashes from the provided value. Read more
Source§

fn iter_keyed_siphashes13_from_value<H: Hash>( value: H, key0: u64, key1: u64, ) -> impl Iterator<Item = Word>

Iterate on the keyed SipHasher13 hashes from the provided value. Read more
Source§

fn iter_fvn_from_value<H: Hash>(value: H) -> impl Iterator<Item = Word>

Iterate on the FVN hashes from the provided value. Read more
Source§

fn iter_keyed_fvn_from_value<H: Hash>( value: H, key: u64, ) -> impl Iterator<Item = Word>

Iterate on the keyed SipHasher13 hashes from the provided value. Read more
Source§

impl<Word: PartialEq, const PERMUTATIONS: usize> PartialEq for MinHash<Word, PERMUTATIONS>

Source§

fn eq(&self, other: &MinHash<Word, PERMUTATIONS>) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl<Word: Copy, const PERMUTATIONS: usize> Copy for MinHash<Word, PERMUTATIONS>

Source§

impl<Word: Eq, const PERMUTATIONS: usize> Eq for MinHash<Word, PERMUTATIONS>

Source§

impl<Word, const PERMUTATIONS: usize> StructuralPartialEq for MinHash<Word, PERMUTATIONS>

Auto Trait Implementations§

§

impl<Word, const PERMUTATIONS: usize> Freeze for MinHash<Word, PERMUTATIONS>
where Word: Freeze,

§

impl<Word, const PERMUTATIONS: usize> RefUnwindSafe for MinHash<Word, PERMUTATIONS>
where Word: RefUnwindSafe,

§

impl<Word, const PERMUTATIONS: usize> Send for MinHash<Word, PERMUTATIONS>
where Word: Send,

§

impl<Word, const PERMUTATIONS: usize> Sync for MinHash<Word, PERMUTATIONS>
where Word: Sync,

§

impl<Word, const PERMUTATIONS: usize> Unpin for MinHash<Word, PERMUTATIONS>
where Word: Unpin,

§

impl<Word, const PERMUTATIONS: usize> UnwindSafe for MinHash<Word, PERMUTATIONS>
where Word: UnwindSafe,

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.