[][src]Struct lsh_rs::LSH

pub struct LSH<T: HashTables, H: VecHash> {
    pub n_hash_tables: usize,
    pub n_projections: usize,
    pub hashers: Vec<H>,
    pub dim: usize,
    pub hash_tables: Option<T>,
    // some fields omitted
}

Wrapper for LSH functionality. Can be initialized following the Builder pattern.

Example

use lsh_rs::LshMem;
let n_projections = 9;
let n_hash_tables = 45;
let dim = 10;
let lsh = LshMem::new(n_projections, n_hash_tables, dim)
    .only_index()
    .seed(1)
    .srp();

Builder pattern methods

The following methods can be used to change internal state during object initialization:

Fields

n_hash_tables: usize

Number of hash tables. L in literature.

n_projections: usize

Number of hash functions. K in literature.

hashers: Vec<H>

Hash functions.

dim: usize

Dimensions of p and q

hash_tables: Option<T>

Storage data structure

Methods

impl<T: HashTables> LSH<T, SignRandomProjections>[src]

pub fn srp(&mut self) -> Result<Self>[src]

Create a new SignRandomProjections LSH

impl<T: HashTables> LSH<T, L2>[src]

pub fn l2(&mut self, r: f32) -> Result<Self>[src]

Create a new L2 LSH

See hash function: https://www.cs.princeton.edu/courses/archive/spring05/cos598E/bib/p253-datar.pdf in paragraph 3.2

h(v) = floor(a^Tv + b / r)

Arguments

  • r - Parameter of hash function.

impl<T: HashTables> LSH<T, MIPS>[src]

pub fn mips(&mut self, r: f32, U: f32, m: usize) -> Result<Self>[src]

Create a new MIPS LSH

Async hasher

See hash function: https://www.cs.rice.edu/~as143/Papers/SLIDE_MLSys.pdf

Arguments

  • r - Parameter of hash function.
  • U - Parameter of hash function.
  • m - Parameter of hash function.

impl<H: VecHash + Sync, T: HashTables + Sync> LSH<T, H>[src]

pub fn query_bucket_ids_batch_par(
    &self,
    vs: &[DataPoint]
) -> Result<Vec<Vec<u32>>>
[src]

Query bucket collision for a batch of data points in parallel.

Arguments

  • vs - Array of data points.

pub fn query_bucket_ids_batch_arr_par(
    &self,
    vs: ArrayView2<FloatSize>
) -> Result<Vec<Vec<u32>>>
[src]

Query bucket collision for a batch of data points in parallel.

Arguments

  • vs - Array of data points.

impl<H: VecHash + Sync, T: HashTables> LSH<T, H>[src]

pub fn store_vecs(&mut self, vs: &[DataPoint]) -> Result<Vec<u32>>[src]

Store multiple vectors in storage. Before storing the storage capacity is possibly increased to match the data points.

Arguments

  • vs - Array of data points.

Examples

 use lsh_rs::LshSql;
 let mut lsh = LshSql::new(5, 10, 3).srp();
 let vs = &[&[2., 3., 4.],
            &[-1., -1., 1.]];
 let ids = lsh.store_vecs(vs);

pub fn store_array(&mut self, vs: ArrayView2<FloatSize>) -> Result<Vec<u32>>[src]

Store a 2D array in storage. Before storing the storage capacity is possibly increased to match the data points.

Arguments

  • vs - Array of data points.

Examples

 use lsh_rs::LshSql;
 use ndarray::prelude::*;
 let mut lsh = LshSql::new(5, 10, 3).srp();
 let vs = array![[1., 2., 3.], [4., 5., 6.]];
 let ids = lsh.store_array(vs);

impl<H: VecHash, T: HashTables> LSH<T, H>[src]

pub fn new(n_projections: usize, n_hash_tables: usize, dim: usize) -> Self[src]

Create a new Base LSH

Arguments

  • n_projections - Hash length. Every projections creates an hashed integer
  • n_hash_tables - Increases the chance of finding the closest but has a performance and space cost.
  • dim - Dimensions of the data points.

pub fn seed(&mut self, seed: u64) -> &mut Self[src]

Set seed of LSH

Arguments

  • seed - Seed for the RNG's if 0, RNG's are seeded randomly.

pub fn only_index(&mut self) -> &mut Self[src]

Only store indexes of data points. The mapping of data point to indexes is done outside of the LSH struct.

pub fn multi_probe(&mut self, budget: usize) -> &mut Self[src]

Enable multi-probing LSH and set multi-probing parameters.

Arguments

  • budget - The number of probes (close hashes) will be executed per query.

pub fn base(&mut self) -> &mut Self[src]

pub fn increase_storage(&mut self, upper_bound: usize) -> Result<&mut Self>[src]

Increase storage of the hash_tables backend. This can reduce system calls.

Arguments

  • upper_bound - The maximum storage capacity required.

pub fn set_database_file(&mut self, path: &str) -> &mut Self[src]

Location where the database file should be written/ can be found. This only has effect with the SqlTable backend.

Arguments

  • path - File path.

pub fn describe(&self) -> Result<String>[src]

Collects statistics of the buckets in the hash_tables.

Statistics

  • average bucket length
  • minimal bucket length
  • maximum bucket length
  • bucket lenght standard deviation

pub fn store_vec(&mut self, v: &DataPointSlice) -> Result<u32>[src]

Store a single vector in storage. Returns id.

Arguments

  • v - Data point.

Examples

use lsh_rs::LshSql;
let mut lshd = LshSql::new(5, 10, 3).srp();
let v = &[2., 3., 4.];
let id = lsh.store_vec(v);

pub fn update_by_idx(
    &mut self,
    idx: u32,
    new_v: &DataPointSlice,
    old_v: &DataPointSlice
) -> Result<()>
[src]

Update a data point in the hash_tables.

Arguments

  • idx - Id of the hash that needs to be updated.
  • new_v - New data point that needs to be hashed.
  • old_v - Old data point. Needed to remove the old hash.

pub fn query_bucket(&self, v: &DataPointSlice) -> Result<Vec<&DataPoint>>[src]

Query all buckets in the hash tables. The union of the matching buckets over the L hash tables is returned

Arguments

  • v - Query vector

pub fn query_bucket_ids(&self, v: &DataPointSlice) -> Result<Vec<u32>>[src]

Query all buckets in the hash tables and return the data point indexes. The union of the matching buckets of L hash tables is returned.

Arguments

  • v - Query vector

pub fn query_bucket_ids_batch(&self, vs: &[DataPoint]) -> Result<Vec<Vec<u32>>>[src]

Query bucket collision for a batch of data points.

Arguments

  • vs - Array of data points.

pub fn query_bucket_ids_batch_arr(
    &self,
    vs: ArrayView2<FloatSize>
) -> Result<Vec<Vec<u32>>>
[src]

Query bucket collision for a batch of data points.

Arguments

  • vs - Array of data points.

pub fn delete_vec(&mut self, v: &DataPointSlice) -> Result<()>[src]

Delete data point from storage. This does not free memory as the storage vector isn't resized.

Arguments

  • v - Data point

impl<T: VecHash + Serialize> LSH<SqlTable, T>[src]

pub fn commit(&mut self) -> Result<()>[src]

Commit SqlTable backend

pub fn init_transaction(&mut self) -> Result<()>[src]

Init transaction of SqlTable backend.

impl<H> LSH<MemoryTable, H> where
    H: Serialize + DeserializeOwned + VecHash
[src]

pub fn load<P: AsRef<Path>>(&mut self, path: P) -> Result<()>[src]

Deserialize MemoryTable backend

pub fn dump<P: AsRef<Path>>(&self, path: P) -> Result<()>[src]

Serialize MemoryTable backend

impl<H: VecHash, T: HashTables> LSH<T, H>[src]

pub fn multi_probe_bucket_union(
    &self,
    v: &DataPointSlice
) -> Result<FnvHashSet<u32>>
[src]

Auto Trait Implementations

impl<T, H> RefUnwindSafe for LSH<T, H> where
    H: RefUnwindSafe,
    T: RefUnwindSafe

impl<T, H> Send for LSH<T, H> where
    H: Send,
    T: Send

impl<T, H> Sync for LSH<T, H> where
    H: Sync,
    T: Sync

impl<T, H> Unpin for LSH<T, H> where
    H: Unpin,
    T: Unpin

impl<T, H> UnwindSafe for LSH<T, H> where
    H: UnwindSafe,
    T: UnwindSafe

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

impl<V, T> VZip<V> for T where
    V: MultiLane<T>,