use rayon::prelude::*;
use rand_xoshiro::rand_core::SeedableRng;
use rand_xoshiro::Xoshiro256PlusPlus;
use quantiles::ckms::CKMS;
use rand::distr::{Distribution, Uniform};
use anndists::dist::*;
#[allow(unused)]
pub(crate) fn scale_estimation<T, Dist>(
nbsample_arg: usize,
data: &[Vec<T>],
distance: &Dist,
) -> CKMS<f32>
where
Dist: Distance<T> + Sync,
T: Send + Sync,
{
let mut rng = Xoshiro256PlusPlus::seed_from_u64(1454691);
let nbdata = data.len();
let unif = Uniform::<usize>::new(0, nbdata).unwrap();
let nbsample = nbsample_arg.min(nbdata * nbdata); let couples: Vec<(usize, usize)> = (0..nbsample)
.map(|_| (unif.sample(&mut rng), unif.sample(&mut rng)))
.filter(|c| c.0 != c.1)
.collect();
let dvec: Vec<f32> = couples
.into_par_iter()
.map(|(it1, it2)| distance.eval(&data[it1], &data[it2]))
.collect();
let mut q_dist = CKMS::<f32>::new(0.01);
for d in dvec {
q_dist.insert(d);
}
println!("\n distance quantiles at 0.0001 : {:.2e} , 0.001 : {:.2e}, 0.01 : {:.2e} , 0.5 : {:.2e}, 0.99 : {:.2e} 0.999 : {:.2e}\n",
q_dist.query(0.0001).unwrap().1, q_dist.query(0.001).unwrap().1, q_dist.query(0.01).unwrap().1,
q_dist.query(0.5).unwrap().1, q_dist.query(0.99).unwrap().1, q_dist.query(0.999).unwrap().1);
q_dist
}
pub(crate) fn get_neighborhood_size<T, Dist>(
_nbsample_arg: usize,
data: &[Vec<T>],
distance: &Dist,
) -> CKMS<f32>
where
Dist: Distance<T> + Sync,
T: Send + Sync,
{
let nbdata = data.len();
let unif = Uniform::<usize>::new(0, nbdata).unwrap();
let nb_sample: usize = 1.max(nbdata.ilog2() as usize);
let explore = |i: usize| -> (f32, f32) {
let mut rng = Xoshiro256PlusPlus::seed_from_u64(14547 + i as u64).clone();
let mut dvec: Vec<f32> = (0..nb_sample)
.map(|_| {
loop {
let j = unif.sample(&mut rng);
if j != i {
let dist = distance.eval(&data[i], &data[j]);
break dist;
}
}
})
.collect();
dvec.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
(dvec[0], dvec[1])
}; let dist_2: Vec<(f32, f32)> = (0..nb_sample).into_par_iter().map(explore).collect();
let mut q1_dist: CKMS<f32> = CKMS::<f32>::new(0.01);
let mut q2_dist: CKMS<f32> = CKMS::<f32>::new(0.01);
for d in dist_2 {
q1_dist.insert(d.0);
q2_dist.insert(d.1);
}
println!("\n distance quantiles at 0.001 : {:.2e}, 0.01 : {:.2e} , 0.5 : {:.2e}, 0.99 : {:.2e} 0.999 : {:.2e}\n",
q1_dist.query(0.001).unwrap().1, q1_dist.query(0.01).unwrap().1,
q1_dist.query(0.5).unwrap().1, q1_dist.query(0.99).unwrap().1, q1_dist.query(0.999).unwrap().1);
println!("\n distance quantiles at 0.001 : {:.2e}, 0.01 : {:.2e} , 0.5 : {:.2e}, 0.99 : {:.2e} 0.999 : {:.2e}\n",
q2_dist.query(0.001).unwrap().1, q2_dist.query(0.01).unwrap().1,
q2_dist.query(0.5).unwrap().1, q2_dist.query(0.99).unwrap().1, q2_dist.query(0.999).unwrap().1);
q2_dist
}