use crate::{dist::l2_norm, hash::HashPrimitive, DataPoint, HashTables, LshMem, Result, VecHash};
use fnv::FnvHashSet;
use ndarray::aview1;
use rayon::prelude::*;
use statrs::{
consts::SQRT_2PI,
distribution::{Normal, Univariate},
};
use std::f64::consts::PI;
use std::time::Instant;
pub fn l2_ph(r: f64, c: f64) -> f64 {
let norm = Normal::new(0., 1.).unwrap();
1. - 2. * norm.cdf(-r / c)
- 2. / (SQRT_2PI * r / c) * (1. - (-(r.powf(2.) / (2. * c.powf(2.)))).exp())
}
pub fn srp_ph(cosine_sim: f64) -> f64 {
1. - cosine_sim.acos() / PI
}
pub fn estimate_l(delta: f64, p1: f64, k: usize) -> usize {
(delta.ln() / (1. - p1.powf(k as f64)).ln()).round() as usize
}
#[derive(Debug)]
pub struct OptRes {
pub k: usize,
pub l: usize,
pub search_time: f64,
pub hash_time: f64,
pub min_len: usize,
pub max_len: usize,
pub avg_len: f32,
pub unique_hash_values: FnvHashSet<HashPrimitive>,
}
fn lsh_to_result<T: 'static + VecHash + Send + Sync + Clone>(
lsh: LshMem<T>,
vs: &[DataPoint],
k: usize,
l: usize,
) -> Result<OptRes> {
let mut lsh = lsh;
lsh.store_vecs(vs)?;
let mut search_time = 0.;
let mut hash_time = 0.;
let mut bucket_lengths = Vec::with_capacity(vs.len());
for v in vs {
let th = Instant::now();
let mut bucket_ids = lsh.query_bucket_ids(v)?;
hash_time += th.elapsed().as_secs_f64();
bucket_lengths.push(bucket_ids.len());
let t0 = Instant::now();
let q = aview1(&v);
bucket_ids.par_sort_by_key(|&idx| {
let p = &vs[idx as usize];
let dist = &aview1(&p) - &q;
let l2 = l2_norm(dist.as_slice().unwrap());
(l2 * 1e5) as i32
});
let duration = t0.elapsed();
search_time += duration.as_secs_f64();
}
let min = *bucket_lengths.iter().min().unwrap_or(&(0 as usize));
let max = *bucket_lengths.iter().max().unwrap_or(&(0 as usize));
let avg = bucket_lengths.iter().sum::<usize>() as f32 / bucket_lengths.len() as f32;
let unique_hash_values = lsh.hash_tables.unwrap().get_unique_hash_int();
Ok(OptRes {
k,
l,
search_time,
hash_time,
min_len: min,
max_len: max,
avg_len: avg,
unique_hash_values,
})
}
pub fn optimize_srp_params(
delta: f64,
cosine_sim: f64,
dim: usize,
k: &[usize],
vs: &[DataPoint],
) -> Result<Vec<OptRes>> {
let mut params = vec![];
let p1 = srp_ph(cosine_sim);
for _k in k {
let l = estimate_l(delta, p1, *_k);
params.push((*_k, l))
}
let result = params
.par_iter()
.map(|&(k, l)| {
let lsh = LshMem::new(k, l, dim).srp()?;
lsh_to_result(lsh, vs, k, l)
})
.collect();
result
}
pub fn optimize_l2_params(
delta: f64,
dim: usize,
k: &[usize],
vs: &[DataPoint],
) -> Result<Vec<OptRes>> {
let mut params = vec![];
let r = 4.0;
let p1 = l2_ph(r as f64, 1.);
for _k in k {
let l = estimate_l(delta, p1, *_k as usize);
params.push((r, *_k, l))
}
let result = params
.par_iter()
.map(|&(r, k, l)| {
let lsh = LshMem::new(k, l, dim).l2(r as f32)?;
lsh_to_result(lsh, vs, k, l)
})
.collect();
result
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_l2_ph() {
let r = 2.0;
let c = 1.0;
assert_eq!(0.609548422215397, l2_ph(r, c) as f32);
}
#[test]
fn test_estimate_l() {
let delta = 0.2;
let p1 = 0.6;
let k = 5;
assert_eq!(20, estimate_l(delta, p1, k));
}
}