use crate::arrayadapter::ArrayAdapter;
use crate::util::*;
use core::ops::AddAssign;
use num_traits::{Signed, Zero, Float};
use std::convert::From;
#[inline]
fn _loss<N, L>(a: N, b: N) -> L
where
N: Zero + Copy,
L: Float + From<N>,
{
if N::is_zero(&a) || N::is_zero(&b) { L::zero() } else { <L as From<N>>::from(a) / <L as From<N>>::from(b) }
}
pub fn fastermsc<M, N, L>(
mat: &M,
med: &mut Vec<usize>,
maxiter: usize,
) -> (L, Vec<usize>, usize, usize)
where
N: Zero + PartialOrd + Copy,
L: Float + Signed + AddAssign + From<N> + From<u32>,
M: ArrayAdapter<N>,
{
let (n, k) = (mat.len(), med.len());
if k == 1 {
let assi = vec![0; n];
let (swapped, loss) = choose_medoid_within_partition::<M, N, L>(mat, &assi, med, 0);
return (loss, assi, 1, if swapped { 1 } else { 0 });
}
if k == 2 { return fastermsc_k2(mat, med, maxiter);
}
let (mut loss, mut data): (L, _) = initial_assignment(mat, med);
debug_assert_assignment_th(mat, med, &data);
let mut removal_loss = vec![L::zero(); k];
update_removal_loss(&data, &mut removal_loss);
let (mut lastswap, mut n_swaps, mut iter) = (n, 0, 0);
while iter < maxiter {
iter += 1;
let (swaps_before, lastloss) = (n_swaps, loss);
for j in 0..n {
if j == lastswap {
break;
}
if j == med[data[j].near.i as usize] {
continue; }
let (change, b) = find_best_swap(mat, &removal_loss, &data, j);
if change <= L::zero() {
continue; }
n_swaps += 1;
lastswap = j;
loss = do_swap(mat, med, &mut data, b, j);
update_removal_loss(&data, &mut removal_loss);
}
if n_swaps == swaps_before || loss >= lastloss {
break; }
}
let assi = data.iter().map(|x| x.near.i as usize).collect();
loss = L::one() - loss / <L as From<u32>>::from(n as u32);
(loss, assi, iter, n_swaps)
}
#[inline]
pub(crate) fn initial_assignment<M, N, L>(mat: &M, med: &[usize]) -> (L, Vec<Reco<N>>)
where
N: Zero + PartialOrd + Copy,
L: Float + AddAssign + From<N>,
M: ArrayAdapter<N>,
{
let (n, k) = (mat.len(), med.len());
assert!(mat.is_square(), "Dissimilarity matrix is not square");
assert!(n <= u32::MAX as usize, "N is too large");
assert!(k > 0 && k < u32::MAX as usize, "invalid N");
assert!(k <= n, "k must be at most N");
let mut data = vec![Reco::<N>::empty(); mat.len()];
let firstcenter = med[0];
let loss = data
.iter_mut()
.enumerate()
.map(|(i, cur)| {
*cur = Reco::new(0, mat.get(i, firstcenter), u32::MAX, N::zero(), u32::MAX, N::zero());
for (m, &me) in med.iter().enumerate().skip(1) {
let d = mat.get(i, me);
if d < cur.near.d || i == me {
cur.third = cur.seco;
cur.seco = cur.near;
cur.near = DistancePair { i: m as u32, d };
} else if cur.seco.i == u32::MAX || d < cur.seco.d {
cur.third = cur.seco;
cur.seco = DistancePair { i: m as u32, d };
} else if cur.third.i == u32::MAX || d < cur.third.d {
cur.third = DistancePair { i: m as u32, d };
}
}
_loss::<N, L>(cur.near.d, cur.seco.d)
})
.reduce(L::add)
.unwrap();
(loss, data)
}
#[inline]
pub(crate) fn find_best_swap<M, N, L>(
mat: &M,
removal_loss: &[L],
data: &[Reco<N>],
j: usize,
) -> (L, usize)
where
N: Zero + PartialOrd + Copy,
L: Float + AddAssign + From<N>,
M: ArrayAdapter<N>,
{
let mut ploss = removal_loss.to_vec();
let mut acc = L::zero();
for (o, reco) in data.iter().enumerate() {
let doj = mat.get(o, j);
if doj < reco.near.d {
acc += _loss::<N, L>(reco.near.d, reco.seco.d) - _loss::<N, L>(doj, reco.near.d);
ploss[reco.near.i as usize] += _loss::<N, L>(doj, reco.near.d) + _loss::<N, L>(reco.seco.d, reco.third.d) - _loss::<N, L>(reco.near.d + doj, reco.seco.d);
ploss[reco.seco.i as usize] += _loss::<N, L>(reco.near.d, reco.third.d) - _loss::<N, L>(reco.near.d, reco.seco.d);
} else if doj < reco.seco.d {
acc += _loss::<N, L>(reco.near.d, reco.seco.d) - _loss::<N, L>(reco.near.d, doj);
ploss[reco.near.i as usize] += _loss::<N, L>(reco.near.d, doj) + _loss::<N, L>(reco.seco.d, reco.third.d) - _loss::<N, L>(reco.near.d + doj, reco.seco.d);
ploss[reco.seco.i as usize] += _loss::<N, L>(reco.near.d, reco.third.d) - _loss::<N, L>(reco.near.d, reco.seco.d);
} else if doj < reco.third.d {
ploss[reco.near.i as usize] += _loss::<N, L>(reco.seco.d, reco.third.d) - _loss::<N, L>(reco.seco.d, doj);
ploss[reco.seco.i as usize] += _loss::<N, L>(reco.near.d, reco.third.d) - _loss::<N, L>(reco.near.d, doj);
}
}
let (b, bloss) = find_max(&mut ploss.iter());
(bloss + acc, b) }
pub(crate) fn update_removal_loss<N, L>(data: &[Reco<N>], loss: &mut Vec<L>)
where
N: Zero + Copy,
L: Float + Signed + AddAssign + From<N>,
{
loss.fill(L::zero()); for rec in data.iter() {
loss[rec.near.i as usize] += _loss::<N, L>(rec.near.d, rec.seco.d) - _loss::<N, L>(rec.seco.d, rec.third.d);
loss[rec.seco.i as usize] += _loss::<N, L>(rec.near.d, rec.seco.d) - _loss::<N, L>(rec.near.d, rec.third.d);
}
}
#[inline]
pub(crate) fn update_third_nearest<M, N>(
mat: &M,
med: &[usize],
n: usize,
s: usize,
b: usize,
o: usize,
doj: N,
) -> DistancePair<N>
where
N: PartialOrd + Copy,
M: ArrayAdapter<N>,
{
let mut dist = DistancePair::new(b as u32, doj);
for (i, &mi) in med.iter().enumerate() {
if i == n || i == b || i == s {
continue;
}
let d = mat.get(o, mi);
if d < dist.d {
dist = DistancePair::new(i as u32, d);
}
}
dist
}
#[inline]
pub(crate) fn do_swap<M, N, L>(
mat: &M,
med: &mut Vec<usize>,
data: &mut Vec<Reco<N>>,
b: usize,
j: usize,
) -> L
where
N: Zero + PartialOrd + Copy,
L: Float + Signed + AddAssign + From<N>,
M: ArrayAdapter<N>,
{
let n = mat.len();
assert!(b < med.len(), "invalid medoid number");
assert!(j < n, "invalid object number");
med[b] = j;
data.iter_mut()
.enumerate()
.map(|(o, reco)| {
if o == j {
if reco.near.i != b as u32 {
if reco.seco.i != b as u32 {
reco.third = reco.seco;
}
reco.seco = reco.near;
}
reco.near = DistancePair::new(b as u32, N::zero());
return L::zero();
}
let doj = mat.get(o, j);
if reco.near.i == b as u32 {
if doj < reco.seco.d {
reco.near = DistancePair::new(b as u32, doj);
} else if reco.third.i == u32::MAX || doj < reco.third.d {
reco.near = reco.seco;
reco.seco = DistancePair::new(b as u32, doj);
} else {
reco.near = reco.seco;
reco.seco = reco.third;
reco.third = update_third_nearest(mat, med, reco.near.i as usize, reco.seco.i as usize, b, o, doj);
}
} else if reco.seco.i == b as u32 {
if doj < reco.near.d {
reco.seco = reco.near;
reco.near = DistancePair::new(b as u32, doj);
} else if reco.third.i == u32::MAX || doj < reco.third.d {
reco.seco = DistancePair::new(b as u32, doj);
} else {
reco.seco = reco.third;
reco.third = update_third_nearest(mat, med, reco.near.i as usize, reco.seco.i as usize, b, o, doj);
}
} else {
if doj < reco.near.d {
reco.third = reco.seco;
reco.seco = reco.near;
reco.near = DistancePair::new(b as u32, doj);
} else if doj < reco.seco.d {
reco.third = reco.seco;
reco.seco = DistancePair::new(b as u32, doj);
} else if reco.third.i == u32::MAX || doj < reco.third.d {
reco.third = DistancePair::new(b as u32, doj);
} else if reco.third.i == b as u32 {
reco.third = update_third_nearest(mat, med, reco.near.i as usize, reco.seco.i as usize, b, o, doj);
}
}
_loss::<N, L>(reco.near.d, reco.seco.d)
})
.reduce(L::add)
.unwrap()
}
pub(crate) fn fastermsc_k2<M, N, L>(
mat: &M,
med: &mut Vec<usize>,
maxiter: usize,
) -> (L, Vec<usize>, usize, usize)
where
N: Zero + PartialOrd + Copy,
L: Float + Signed + AddAssign + From<N> + From<u32>,
M: ArrayAdapter<N>,
{
let (n, k) = (mat.len(), med.len());
assert!(k == 2, "Only valid for k=2");
let (mut loss, mut assi, mut data): (L,_,_) = initial_assignment_k2(mat, med);
let (mut lastswap, mut n_swaps, mut iter) = (n, 0, 0);
while iter < maxiter {
iter += 1;
let (swaps_before, lastloss) = (n_swaps, loss);
for j in 0..n {
if j == lastswap {
break;
}
if j == med[assi[j] as usize] {
continue; }
let (newloss, b): (L, _) = find_best_swap_k2(mat, &data, j); if newloss >= loss {
continue; }
n_swaps += 1;
lastswap = j;
loss = do_swap_k2(mat, med, &mut assi, &mut data, b, j);
}
if n_swaps == swaps_before || loss >= lastloss {
break; }
}
loss = L::one() - loss / <L as From<u32>>::from(n as u32);
(loss, assi, iter, n_swaps)
}
#[inline]
pub(crate) fn initial_assignment_k2<M, N, L>(mat: &M, med: &[usize]) -> (L, Vec<usize>, Vec<(N,N)>)
where
N: Zero + PartialOrd + Copy,
L: Float + AddAssign + From<N>,
M: ArrayAdapter<N>,
{
let (n, k) = (mat.len(), med.len());
assert!(mat.is_square(), "Dissimilarity matrix is not square");
assert!(n <= u32::MAX as usize, "N is too large");
assert!(k == 2, "k must be 2");
let mut assi = vec![0 as usize; mat.len()];
let mut data = vec![(N::zero(), N::zero()); mat.len()];
let loss = assi.iter_mut().zip(data.iter_mut())
.enumerate()
.map(|(i, (a, d))| {
*d = (mat.get(i, med[0]), mat.get(i, med[1]));
if d.0 < d.1 {
*a = 0;
return _loss::<N, L>(d.0, d.1);
} else {
*a = 1;
return _loss::<N, L>(d.1, d.0);
}
})
.reduce(L::add)
.unwrap();
(loss, assi, data)
}
#[inline]
pub(crate) fn find_best_swap_k2<M, N, L>(
mat: &M,
data: &[(N, N)],
j: usize,
) -> (L, usize)
where
N: Zero + PartialOrd + Copy,
L: Float + AddAssign + From<N>,
M: ArrayAdapter<N>,
{
let mut ploss = vec![L::zero(); 2];
for (o, d) in data.iter().enumerate() {
let doj = mat.get(o, j);
ploss[0] += if doj < d.1 { _loss(doj, d.1) } else { _loss(d.1, doj) };
ploss[1] += if doj < d.0 { _loss(doj, d.0) } else { _loss(d.0, doj) };
}
let (b, bloss) = find_min(&mut ploss.iter());
(bloss, b)
}
#[inline]
pub(crate) fn do_swap_k2<M, N, L>(
mat: &M,
med: &mut Vec<usize>,
assi: &mut Vec<usize>,
data: &mut Vec<(N, N)>,
b: usize,
j: usize,
) -> L
where
N: Zero + PartialOrd + Copy,
L: Float + Signed + AddAssign + From<N>,
M: ArrayAdapter<N>,
{
let n = mat.len();
assert!(b < med.len(), "invalid medoid number");
assert!(j < n, "invalid object number");
med[b] = j;
if b == 0 {
return assi.iter_mut().zip(data.iter_mut())
.enumerate()
.map(|(o, (a, d))| {
if o == j {
*a = 0;
d.0 = N::zero();
return L::zero();
}
let doj = mat.get(o, j);
d.0 = doj;
if doj < d.1 || (doj == d.1 && *a == 0) {
*a = 0;
return _loss::<N, L>(doj, d.1);
} else {
*a = 1;
return _loss::<N, L>(d.1, doj);
}
})
.reduce(L::add)
.unwrap();
} else { return assi.iter_mut().zip(data.iter_mut())
.enumerate()
.map(|(o, (a, d))| {
if o == j {
*a = 1;
d.1 = N::zero();
return L::zero();
}
let doj = mat.get(o, j);
d.1 = doj;
if doj < d.0 || (doj == d.0 && *a == 1) {
*a = 1;
return _loss::<N, L>(doj, d.0);
} else {
*a = 0;
return _loss::<N, L>(d.0, doj);
}
})
.reduce(L::add)
.unwrap();
}
}
#[cfg(test)]
mod tests {
use crate::{arrayadapter::LowerTriangle, fastermsc, silhouette, medoid_silhouette, util::assert_array};
#[test]
fn testfastermsc_simple() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let mut meds = vec![0, 1, 2];
let (loss, assi, n_iter, n_swap): (f64, _, _, _) = fastermsc(&data, &mut meds, 10);
let (sil, _): (f64, _) = silhouette(&data, &assi, false);
let (msil, _): (f64, _) = medoid_silhouette(&data, &meds, false);
print!("FasterMSC: {:?} {:?} {:?} {:?} {:?} {:?}", loss, n_iter, n_swap, sil, assi, meds);
assert_eq!(loss, 0.9047619047619048, "loss not as expected");
assert_eq!(msil, 0.9047619047619048, "Medoid Silhouette not as expected");
assert_eq!(n_swap, 1, "swaps not as expected");
assert_eq!(n_iter, 2, "iterations not as expected");
assert_array(assi, vec![0, 0, 2, 1, 1], "assignment not as expected");
assert_array(meds, vec![0, 3, 2], "medoids not as expected");
assert_eq!(sil, 0.5622222222222222, "Silhouette not as expected");
}
#[test]
fn testfastermsc_simple2() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let mut meds = vec![0, 1];
let (loss, assi, n_iter, n_swap): (f64, _, _, _) = fastermsc(&data, &mut meds, 10);
let (sil, _): (f64, _) = silhouette(&data, &assi, false);
let (msil, _): (f64, _) = medoid_silhouette(&data, &meds, false);
print!("FasterMSC: {:?} {:?} {:?} {:?} {:?} {:?}", loss, n_iter, n_swap, sil, assi, meds);
assert_eq!(loss, 0.8805555555555555, "loss not as expected");
assert_eq!(msil, 0.8805555555555555, "Medoid Silhouette not as expected");
assert_eq!(n_swap, 3, "swaps not as expected");
assert_eq!(n_iter, 2, "iterations not as expected");
assert_array(assi, vec![0, 0, 0, 1, 1], "assignment not as expected");
assert_array(meds, vec![0, 4], "medoids not as expected");
assert_eq!(sil, 0.7522494172494172, "Silhouette not as expected");
}
}