use parking_lot::RwLock;
use rayon::prelude::*;
use rand_xoshiro::rand_core::SeedableRng;
use rand_xoshiro::Xoshiro256PlusPlus;
use rand::distr::{Distribution, Uniform};
use anndists::dist::*;
use crate::facility::*;
use crate::scale::*;
pub struct MettuPlaxton<'b, T: Send + Sync, Dist: Distance<T>> {
nb_data: usize,
data: &'b [Vec<T>],
j: u32,
distance: Dist,
rng: Xoshiro256PlusPlus,
}
impl<'b, T: Send + Sync + Clone, Dist: Distance<T>> MettuPlaxton<'b, T, Dist> {
pub fn new(data: &'b [Vec<T>], distance: Dist) -> Self {
let nb_data: usize = data.len();
let j: u32 = data.len().ilog2();
MettuPlaxton {
nb_data,
data,
j,
distance,
rng: Xoshiro256PlusPlus::seed_from_u64(123),
}
}
pub fn get_nb_data(&self) -> usize {
self.nb_data
}
fn estimate_ball_cardinal(&self, (ip, point): (usize, &Vec<T>), scale: f32) -> (usize, f32)
where
Dist: Sync,
{
let c = 2.;
let mut j_tmp = self.j;
let mut rng = self.rng.clone();
let mut iter_num = 0;
rng.jump();
let unif = Uniform::<usize>::new(0, self.nb_data).unwrap();
let r: f32 = loop {
let r_test = scale / 2_u32.pow(j_tmp) as f32;
let nb_sample_f: f32 = c * (self.nb_data as f32 * r_test / scale) / self.j as f32;
let nb_sample: u32 = nb_sample_f.trunc() as u32;
log::trace!("estimate_ball_cardinal nb_sample : {:?}", nb_sample);
let mut nb_in = 1; for _ in 0..nb_sample {
let k = unif.sample(&mut rng);
let dist = self.distance.eval(point, &self.data[k]);
if dist <= r_test {
nb_in += 1;
}
}
if nb_in >= 2_usize.pow(j_tmp) {
log::debug!("estimate_ball_cardinal for point {:?} ; nb_iter = {:?}, cardinal : {:?}, radius : {:.3e}", ip, iter_num, nb_in, r_test);
break r_test;
} else {
if j_tmp < 1 {
log::error!("error in estimate_ball_cardinal, j_tmp becomes negative");
std::process::exit(1);
}
j_tmp -= 1;
iter_num += 1;
}
};
(ip, r)
}
pub fn construct_centers(&self, alfa: f32) -> Facilities<usize, T, Dist>
where
Dist: Send + Sync + Clone,
{
let q_dist = get_neighborhood_size(1_000_000, self.data, &self.distance);
let threshold = q_dist.query(0.999).unwrap().1;
log::info!("dist medi : {:.3e}", threshold);
let mut facilities: Facilities<usize, T, Dist> =
Facilities::<usize, T, Dist>::new(self.j as usize, self.distance.clone());
let value_to_match = alfa * threshold;
let mut radii: Vec<(usize, f32)> = (0..self.nb_data)
.into_par_iter()
.map(|i| self.estimate_ball_cardinal((i, &self.data[i]), value_to_match))
.collect();
log::debug!("estimate_ball_cardinal done");
radii.sort_unstable_by(|it1, it2| it1.1.partial_cmp(&it2.1).unwrap());
assert!(radii.first().unwrap().1 <= radii.last().unwrap().1);
for p in radii.iter() {
let matched = facilities.match_point(&self.data[p.0], 2. * p.1, &self.distance);
if !matched {
let facility = Facility::new(p.0, &self.data[p.0]);
facilities.insert(facility);
log::debug!("inserted facility at {:?}, radius : {:.3e}", p.0, p.1);
}
}
facilities
}
pub fn compute_distances(&self, facilities: &Facilities<usize, T, Dist>)
where
Dist: Send + Sync,
{
facilities.cross_distances();
} }
pub struct WeightedMettuPlaxton<'b, T: Send + Sync, Dist: Distance<T>> {
nb_data: usize,
data: &'b Vec<Vec<T>>,
weights: &'b Vec<f32>,
j: u32,
distance: Dist,
_rng: Xoshiro256PlusPlus,
}
impl<'b, T: Send + Sync + Clone, Dist: Distance<T> + Send + Sync + Clone>
WeightedMettuPlaxton<'b, T, Dist>
{
pub fn new(data: &'b Vec<Vec<T>>, weights: &'b Vec<f32>, distance: Dist) -> Self {
let nb_data: usize = data.len();
let j: u32 = data.len().ilog2();
WeightedMettuPlaxton {
nb_data,
data,
weights,
j,
distance,
_rng: Xoshiro256PlusPlus::seed_from_u64(123),
}
}
pub fn get_nb_data(&self) -> usize {
self.nb_data
}
fn compute_all_dists(&self) -> Vec<RwLock<Vec<f32>>> {
log::debug!("in WeightedMettuPlaxton::compute_all_dists");
let mut dists: Vec<RwLock<Vec<f32>>> = Vec::<RwLock<Vec<f32>>>::with_capacity(self.nb_data);
for _ in 0..self.nb_data {
let d: Vec<f32> = (0..self.nb_data).map(|_| -1.).collect();
dists.push(RwLock::new(d));
}
let compute_for_i = |i: usize| {
let mut dist_i: Vec<f32> = (0..self.nb_data).map(|_| -1.).collect();
for j in 0..self.nb_data {
let dist = dists[j].read()[i];
let dist_i_j = if dist < 0. {
self.distance.eval(&self.data[i], &self.data[j])
} else {
dist
};
dist_i[j] = dist_i_j;
} *dists[i].write() = dist_i;
1
};
let _res: Vec<i32> = (0..self.nb_data)
.into_par_iter()
.map(compute_for_i)
.collect();
dists
}
fn compute_ball_radius(&self, alfa: f32, dists: &RwLock<Vec<f32>>) -> f32 {
log::debug!(
"\n\n WeightedMettuPlaxton compute_ball_radius , coeff value to match {:.3e}",
alfa
);
log::debug!(
"WeightedMettuPlaxton compute_ball_radius , radii {:?}",
dists.read()
);
let mut indexed_dist: Vec<(usize, f32)> = (0..self.nb_data)
.zip(dists.read().iter())
.map(|(i, f)| (i, *f))
.collect();
indexed_dist.sort_unstable_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
log::debug!("indexed_dist : {:?}", indexed_dist);
let mut cumulated_values = Vec::<(f32, f32)>::with_capacity(self.get_nb_data());
cumulated_values.push((
self.weights[indexed_dist[0].0],
indexed_dist[0].1 * self.weights[indexed_dist[0].0],
));
for j in 1..self.get_nb_data() {
let weight = cumulated_values[j - 1].0 + self.weights[indexed_dist[j].0];
let indexed_weight =
cumulated_values[j - 1].1 + indexed_dist[j].1 * self.weights[indexed_dist[j].0];
cumulated_values.push((weight, indexed_weight));
}
assert_eq!(cumulated_values.len(), self.get_nb_data());
log::debug!("cumulated_values : {:?}", cumulated_values);
let value_at_j =
|j: usize| -> f32 { indexed_dist[j].1 * cumulated_values[j].0 - cumulated_values[j].1 };
let radius_index = self.nb_data.ilog2().max(1);
let value = alfa * value_at_j(radius_index.try_into().unwrap());
log::debug!("value to match : {:.2e}", value);
if log::log_enabled!(log::Level::Debug) {
let check: Vec<f32> = (0..self.get_nb_data()).map(value_at_j).collect();
log::debug!("check : {:?}", check);
}
let upper_value = value_at_j(self.get_nb_data() - 1);
log::debug!(
"imp::compute_ball_radius upper_value : {:.3e} value : {:.3e}",
upper_value,
value
);
let radius: f32;
if upper_value <= value {
log::info!(
"value is too large upper_value : {:.3e} value : {:.3e}",
upper_value,
value
);
std::panic!("not yet implemented");
} else {
let mut upper_index = self.get_nb_data() - 1;
let mut lower_index = 0;
let mut middle_index = (upper_index + lower_index) / 2;
let mut value_at_upper = upper_value;
let mut value_at_lower = 0.;
while upper_index - lower_index > 1 {
log::trace!(
"lower index: {:?}, upper index : {:?} value lower : {:?}, value upper {:?}",
lower_index,
upper_index,
value_at_lower,
value_at_upper
);
let value_at_middle = value_at_j(middle_index);
if value_at_middle > value {
upper_index = middle_index;
value_at_upper = value_at_middle;
} else {
lower_index = middle_index;
value_at_lower = value_at_middle;
}
middle_index = (upper_index + lower_index) / 2;
} log::debug!(
"lower index : {:?}, upper index : {:?}, value lower : {:?}, value upper : {:?} ",
lower_index,
upper_index,
value_at_lower,
value_at_upper
);
radius = indexed_dist[lower_index].1
+ (value - value_at_lower) / cumulated_values[lower_index].0;
log::debug!("got radius : {:?}", radius);
assert!(radius >= indexed_dist[lower_index].1);
assert!(radius < indexed_dist[upper_index].1);
}
if radius > 0. {
radius
} else {
std::panic!("error in compute_ball_radius, radius : {:?}", radius);
}
}
fn compute_balls_at_value(
&self,
alfa: f32,
dists: &[RwLock<Vec<f32>>],
) -> Facilities<usize, T, Dist> {
log::debug!("in WeightedMettuPlaxton::compute_balls_at_value");
let mut radii: Vec<(usize, f32)> = (0..self.nb_data)
.into_par_iter()
.map(|i| (i, self.compute_ball_radius(alfa, &dists[i])))
.collect();
radii.sort_unstable_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
let mut facilities: Facilities<usize, T, Dist> =
Facilities::<usize, T, Dist>::new(self.j as usize, self.distance.clone());
for p in radii.iter() {
let matched = facilities.match_point(&self.data[p.0], 2. * p.1, &self.distance);
if !matched {
let mut facility = Facility::new(p.0, &self.data[p.0]);
facility.insert(self.weights[p.0] as f64, p.1);
log::info!(
"inserting facility at {:?}, radius : {:.3e}, weight : {:.3e}",
p.0,
p.1,
facility.get_weight()
);
facilities.insert(facility);
}
}
facilities
}
pub fn construct_centers(&self, alfa: f32) -> Facilities<usize, T, Dist> {
log::info!(
"in WeightedMettuPlaxton::construct_centers alfa : {:.3e}",
alfa
);
let dists: Vec<RwLock<Vec<f32>>> = self.compute_all_dists();
let mut facilities = self.compute_balls_at_value(alfa, &dists);
let data_unweighted: Vec<&Vec<T>> = self.data.iter().collect();
let ids = (0..data_unweighted.len()).collect::<Vec<usize>>();
facilities.dispatch_data(&data_unweighted, &ids, None);
facilities
} }
#[cfg(test)]
mod tests {
use super::*;
use rand_distr::Normal;
use rand_xoshiro::Xoshiro256PlusPlus;
fn log_init_test() {
let _ = env_logger::builder().is_test(true).try_init();
}
fn generate_weighted_data(nbdata: usize) -> (Vec<Vec<f32>>, Vec<f32>) {
let dim = 50;
let mut data = Vec::<Vec<f32>>::with_capacity(nbdata);
let mut weights = Vec::<f32>::with_capacity(nbdata);
let mut rng = Xoshiro256PlusPlus::seed_from_u64(1454691);
let n_mean1 = 2.;
let n_sigma1 = 1.;
let normal1 = Normal::new(n_mean1, n_sigma1).unwrap();
let unif1 = Uniform::<f32>::new(0.5, 3.).unwrap();
let normal2 = Normal::new(n_mean1, n_sigma1).unwrap();
let unif2 = Uniform::<f32>::new(0.5, 3.).unwrap();
let half = nbdata / 2;
for _ in 0..half {
let d_tmp: Vec<f32> = (0..dim).map(|_| normal1.sample(&mut rng)).collect();
data.push(d_tmp);
weights.push(unif1.sample(&mut rng));
}
for _ in (half + 1)..nbdata {
let d_tmp: Vec<f32> = (0..dim).map(|_| normal2.sample(&mut rng)).collect();
data.push(d_tmp);
weights.push(unif2.sample(&mut rng));
}
(data, weights)
}
#[test]
fn test_weight_mp() {
log_init_test();
log::info!("in test_weight_mp");
let (data, weights) = generate_weighted_data(12);
let distance = DistL2;
let wmp = WeightedMettuPlaxton::new(&data, &weights, distance);
let alfa = 1.;
let _ = wmp.construct_centers(alfa);
} }