use crate::arrayadapter::ArrayAdapter;
use crate::util::*;
use core::ops::AddAssign;
use num_traits::{Signed, Zero};
use std::convert::From;
pub fn fasterpam<M, N, L>(
mat: &M,
med: &mut Vec<usize>,
maxiter: usize,
) -> (L, Vec<usize>, usize, usize)
where
N: Zero + PartialOrd + Copy,
L: AddAssign + Signed + Zero + PartialOrd + Copy + From<N>,
M: ArrayAdapter<N>,
{
let (n, k) = (mat.len(), med.len());
if k == 1 {
let assi = vec![0; n];
let (swapped, loss) = choose_medoid_within_partition::<M, N, L>(mat, &assi, med, 0);
return (loss, assi, 1, if swapped { 1 } else { 0 });
}
let (mut loss, mut data) = initial_assignment(mat, med);
debug_assert_assignment(mat, med, &data);
let mut removal_loss = vec![L::zero(); k];
update_removal_loss(&data, &mut removal_loss);
let (mut lastswap, mut n_swaps, mut iter) = (n, 0, 0);
while iter < maxiter {
iter += 1;
let (swaps_before, lastloss) = (n_swaps, loss);
for j in 0..n {
if j == lastswap {
break;
}
if j == med[data[j].near.i as usize] {
continue; }
let (change, b) = find_best_swap(mat, &removal_loss, &data, j);
if change >= L::zero() {
continue; }
n_swaps += 1;
lastswap = j;
loss = do_swap(mat, med, &mut data, b, j);
update_removal_loss(&data, &mut removal_loss);
}
if n_swaps == swaps_before || loss >= lastloss {
break; }
}
let assi = data.iter().map(|x| x.near.i as usize).collect();
(loss, assi, iter, n_swaps)
}
#[cfg(feature = "rand")]
pub fn rand_fasterpam<M, N, L>(
mat: &M,
med: &mut Vec<usize>,
maxiter: usize,
rng: &mut impl rand::Rng,
) -> (L, Vec<usize>, usize, usize)
where
N: Zero + PartialOrd + Copy,
L: AddAssign + Signed + Zero + PartialOrd + Copy + From<N>,
M: ArrayAdapter<N>,
{
let (n, k) = (mat.len(), med.len());
if k == 1 {
let assi = vec![0; n];
let (swapped, loss) = choose_medoid_within_partition::<M, N, L>(mat, &assi, med, 0);
return (loss, assi, 1, if swapped { 1 } else { 0 });
}
let (mut loss, mut data) = initial_assignment(mat, med);
debug_assert_assignment(mat, med, &data);
let mut removal_loss = vec![L::zero(); k];
update_removal_loss(&data, &mut removal_loss);
let (mut lastswap, mut n_swaps, mut iter) = (n, 0, 0);
let seq = rand::seq::index::sample(rng, n, n); while iter < maxiter {
iter += 1;
let (swaps_before, lastloss) = (n_swaps, loss);
for j in seq.iter() {
if j == lastswap {
break;
}
if j == med[data[j].near.i as usize] {
continue; }
let (change, b) = find_best_swap(mat, &removal_loss, &data, j);
if change >= L::zero() {
continue; }
n_swaps += 1;
lastswap = j;
loss = do_swap(mat, med, &mut data, b, j);
update_removal_loss(&data, &mut removal_loss);
}
if n_swaps == swaps_before || loss >= lastloss {
break; }
}
let assi = data.iter().map(|x| x.near.i as usize).collect();
(loss, assi, iter, n_swaps)
}
#[inline]
pub(crate) fn initial_assignment<M, N, L>(mat: &M, med: &[usize]) -> (L, Vec<Rec<N>>)
where
N: Zero + PartialOrd + Copy,
L: AddAssign + Zero + PartialOrd + Copy + From<N>,
M: ArrayAdapter<N>,
{
let (n, k) = (mat.len(), med.len());
assert!(mat.is_square(), "Dissimilarity matrix is not square");
assert!(n <= u32::MAX as usize, "N is too large");
assert!(k > 0 && k < u32::MAX as usize, "invalid N");
assert!(k <= n, "k must be at most N");
let mut data = vec![Rec::<N>::empty(); mat.len()];
let firstcenter = med[0];
let loss = data
.iter_mut()
.enumerate()
.map(|(i, cur)| {
*cur = Rec::new(0, mat.get(i, firstcenter), u32::MAX, N::zero());
for (m, &me) in med.iter().enumerate().skip(1) {
let d = mat.get(i, me);
if d < cur.near.d || i == me {
cur.seco = cur.near;
cur.near = DistancePair { i: m as u32, d };
} else if cur.seco.i == u32::MAX || d < cur.seco.d {
cur.seco = DistancePair { i: m as u32, d };
}
}
L::from(cur.near.d)
})
.reduce(L::add)
.unwrap();
(loss, data)
}
#[inline]
pub(crate) fn find_best_swap<M, N, L>(
mat: &M,
removal_loss: &[L],
data: &[Rec<N>],
j: usize,
) -> (L, usize)
where
N: Zero + PartialOrd + Copy,
L: AddAssign + Signed + Zero + PartialOrd + Copy + From<N>,
M: ArrayAdapter<N>,
{
let mut ploss = removal_loss.to_vec();
let mut acc = L::zero();
for (o, reco) in data.iter().enumerate() {
let doj = mat.get(o, j);
if doj < reco.near.d {
acc += L::from(doj) - L::from(reco.near.d);
ploss[reco.near.i as usize] += L::from(reco.near.d) - L::from(reco.seco.d);
} else if doj < reco.seco.d {
ploss[reco.near.i as usize] += L::from(doj) - L::from(reco.seco.d);
}
}
let (b, bloss) = find_min(&mut ploss.iter());
(bloss + acc, b) }
pub(crate) fn update_removal_loss<N, L>(data: &[Rec<N>], loss: &mut Vec<L>)
where
N: Zero + Copy,
L: AddAssign + Signed + Copy + Zero + From<N>,
{
loss.fill(L::zero()); for rec in data.iter() {
loss[rec.near.i as usize] += L::from(rec.seco.d) - L::from(rec.near.d);
}
}
#[inline]
pub(crate) fn update_second_nearest<M, N>(
mat: &M,
med: &[usize],
n: usize,
b: usize,
o: usize,
doj: N,
) -> DistancePair<N>
where
N: PartialOrd + Copy,
M: ArrayAdapter<N>,
{
let mut s = DistancePair::new(b as u32, doj);
for (i, &mi) in med.iter().enumerate() {
if i == n || i == b {
continue;
}
let d = mat.get(o, mi);
if d < s.d {
s = DistancePair::new(i as u32, d);
}
}
s
}
#[inline]
pub(crate) fn do_swap<M, N, L>(
mat: &M,
med: &mut Vec<usize>,
data: &mut Vec<Rec<N>>,
b: usize,
j: usize,
) -> L
where
N: Zero + PartialOrd + Copy,
L: AddAssign + Signed + Zero + PartialOrd + Copy + From<N>,
M: ArrayAdapter<N>,
{
let n = mat.len();
assert!(b < med.len(), "invalid medoid number");
assert!(j < n, "invalid object number");
med[b] = j;
data.iter_mut()
.enumerate()
.map(|(o, reco)| {
if o == j {
if reco.near.i != b as u32 {
reco.seco = reco.near;
}
reco.near = DistancePair::new(b as u32, N::zero());
return L::zero();
}
let doj = mat.get(o, j);
if reco.near.i == b as u32 {
if doj < reco.seco.d {
reco.near = DistancePair::new(b as u32, doj);
} else {
reco.near = reco.seco;
reco.seco = update_second_nearest(mat, med, reco.near.i as usize, b, o, doj);
}
} else {
if doj < reco.near.d {
reco.seco = reco.near;
reco.near = DistancePair::new(b as u32, doj);
} else if doj < reco.seco.d {
reco.seco = DistancePair::new(b as u32, doj);
} else if reco.seco.i == b as u32 {
reco.seco = update_second_nearest(mat, med, reco.near.i as usize, b, o, doj);
}
}
L::from(reco.near.d)
})
.reduce(L::add)
.unwrap()
}
#[cfg(test)]
mod tests {
use crate::{arrayadapter::LowerTriangle, fasterpam, silhouette, util::assert_array};
#[test]
fn testfasterpam_simple() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let mut meds = vec![0, 1];
let (loss, assi, n_iter, n_swap): (i64, _, _, _) = fasterpam(&data, &mut meds, 10);
let (sil, _): (f64, _) = silhouette(&data, &assi, false);
assert_eq!(loss, 4, "loss not as expected");
assert_eq!(n_swap, 2, "swaps not as expected");
assert_eq!(n_iter, 2, "iterations not as expected");
assert_array(assi, vec![0, 0, 0, 1, 1], "assignment not as expected");
assert_array(meds, vec![0, 3], "medoids not as expected");
assert_eq!(sil, 0.7522494172494172, "Silhouette not as expected");
}
#[test]
fn testfasterpam_single_cluster() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let mut meds = vec![1]; let (loss, assi, n_iter, n_swap): (i64, _, _, _) = fasterpam(&data, &mut meds, 10);
let (sil, _): (f64, _) = silhouette(&data, &assi, false);
assert_eq!(loss, 14, "loss not as expected");
assert_eq!(n_swap, 1, "swaps not as expected");
assert_eq!(n_iter, 1, "iterations not as expected");
assert_array(assi, vec![0, 0, 0, 0, 0], "assignment not as expected");
assert_array(meds, vec![0], "medoids not as expected");
assert_eq!(sil, 0., "Silhouette not as expected");
}
#[cfg(feature = "rand")]
use crate::rand_fasterpam;
#[cfg(feature = "rand")]
use rand::{rngs::StdRng, SeedableRng};
#[cfg(feature = "rand")]
#[test]
fn testrand_fasterpam() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let mut meds = vec![0, 1];
let mut rng = StdRng::seed_from_u64(1);
let (loss, assi, n_iter, n_swap): (i64, _, _, _) =
rand_fasterpam(&data, &mut meds, 10, &mut rng);
let (sil, _): (f64, _) = silhouette(&data, &assi, false);
assert_eq!(loss, 4, "loss not as expected");
assert_eq!(n_swap, 1, "swaps not as expected");
assert_eq!(n_iter, 2, "iterations not as expected");
assert_array(assi, vec![0, 0, 0, 1, 1], "assignment not as expected");
assert_array(meds, vec![0, 4], "medoids not as expected");
assert_eq!(sil, 0.7522494172494172, "Silhouette not as expected");
}
}