#![allow(clippy::needless_range_loop)]
pub mod arrayadapter;
pub mod safeadd;
pub use crate::arrayadapter::ArrayAdapter;
pub use crate::safeadd::SafeAdd;
use num_traits::{NumAssignOps, Signed, Zero};
#[derive(Debug, Copy, Clone)]
struct DistancePair<N> {
i: u32,
d: N,
}
#[derive(Debug)]
struct Rec<N> {
near: DistancePair<N>,
seco: DistancePair<N>,
}
#[inline]
fn initial_assignment<M, N>(mat: &M, med: &[usize], data: &mut Vec<Rec<N>>) -> N
where
N: NumAssignOps + Zero + PartialOrd + Copy + SafeAdd,
M: ArrayAdapter<N>,
{
let n = mat.len();
let k = med.len();
assert!(mat.is_square(), "Dissimilarity matrix is not square");
assert!(n <= u32::MAX as usize, "N is too large");
assert!(k > 0 && k < u32::MAX as usize, "invalid N");
assert!(k <= n, "k must be at most N");
assert!(data.is_empty(), "data not empty");
let firstcenter = med[0];
let mut loss: N = N::zero();
for i in 0..n {
let mut cur = Rec::<N> {
near: DistancePair {
i: 0,
d: mat.get(i, firstcenter),
},
seco: DistancePair {
i: u32::MAX,
d: N::zero(),
},
};
for m in 1..k {
let dm = mat.get(i, med[m]);
if dm < cur.near.d || i == med[m] {
cur.seco = cur.near;
cur.near = DistancePair { i: m as u32, d: dm };
} else if cur.seco.i == u32::MAX || dm < cur.seco.d {
cur.seco = DistancePair { i: m as u32, d: dm };
}
}
loss.safe_inc(cur.near.d);
data.push(cur);
}
loss
}
fn update_removal_loss<N>(data: &[Rec<N>], loss: &mut Vec<N>)
where
N: NumAssignOps + Signed + Copy + Zero + SafeAdd,
{
let n = data.len();
for i in 0..loss.len() {
loss[i] = N::zero();
}
for i in 0..n {
let rec = &data[i];
loss[rec.near.i as usize].safe_inc(rec.seco.d - rec.near.d);
}
}
#[inline]
fn find_min<N>(a: &[N]) -> (usize, N)
where
N: PartialOrd + Copy + Zero,
{
let mut rk: usize = a.len();
let mut rv: N = N::zero();
for (ik, iv) in a.iter().enumerate() {
if ik == 0 || *iv < rv {
rk = ik;
rv = *iv;
}
}
(rk, rv)
}
#[inline]
fn update_second_nearest<M, N>(
mat: &M,
med: &[usize],
n: usize,
b: usize,
o: usize,
djo: N,
) -> DistancePair<N>
where
N: NumAssignOps + PartialOrd + Copy + SafeAdd,
M: ArrayAdapter<N>,
{
let mut s = DistancePair {
i: b as u32,
d: djo,
};
for i in 0..med.len() {
if i == n || i == b {
continue;
}
let dm = mat.get(o, med[i]);
if dm < s.d {
s = DistancePair { i: i as u32, d: dm };
}
}
s
}
#[inline]
fn find_best_swap<M, N>(mat: &M, removal_loss: &[N], data: &[Rec<N>], j: usize) -> (N, usize)
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd,
M: ArrayAdapter<N>,
{
let n = mat.len();
let mut ploss = removal_loss.to_vec();
let mut acc = N::zero();
for o in 0..n {
let reco = &data[o];
let djo = mat.get(j, o);
if djo < reco.near.d {
acc.safe_inc(djo - reco.near.d);
ploss[reco.near.i as usize].safe_inc(reco.near.d - reco.seco.d);
} else if djo < reco.seco.d {
ploss[reco.near.i as usize].safe_inc(djo - reco.seco.d);
}
}
let (b, bloss) = find_min(&ploss);
(bloss + acc, b)
}
#[inline]
fn find_best_swap_pam<M, N>(mat: &M, med: &[usize], data: &[Rec<N>], j: usize) -> (N, usize)
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd,
M: ArrayAdapter<N>,
{
let n = mat.len();
let k = med.len();
let recj = &data[j];
let mut best = (N::zero(), usize::MAX);
for m in 0..k {
let mut acc = -recj.near.d;
for o in 0..n {
if o == j {
continue;
}
let reco = &data[o];
let djo = mat.get(j, o);
if reco.near.i as usize == m {
if djo < reco.seco.d {
acc.safe_inc(djo - reco.near.d);
} else {
acc.safe_inc(reco.seco.d - reco.near.d);
}
} else if djo < reco.near.d {
acc.safe_inc(djo - reco.near.d);
}
}
if acc < best.0 {
best = (acc, m);
}
}
best
}
#[cfg(feature = "assertions")]
fn debug_validate_assignment<M, N>(mat: &M, med: &[usize], data: &[Rec<N>])
where
N: NumAssignOps + PartialOrd + Copy + SafeAdd,
M: ArrayAdapter<N>,
{
let n = mat.len();
for o in 0..n {
debug_assert!(
mat.get(o, med[data[o].near.i as usize]) == data[o].near.d,
"primary assignment inconsistent"
);
debug_assert!(
mat.get(o, med[data[o].seco.i as usize]) == data[o].seco.d,
"secondary assignment inconsistent"
);
debug_assert!(
data[o].near.d <= data[o].seco.d,
"nearest is farther than second nearest"
);
}
}
#[inline]
fn do_swap<M, N>(mat: &M, med: &mut Vec<usize>, data: &mut Vec<Rec<N>>, b: usize, j: usize) -> N
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd,
M: ArrayAdapter<N>,
{
let n = mat.len();
assert!(b < med.len(), "invalid medoid number");
assert!(j < n, "invalid object number");
med[b] = j;
let mut newloss = N::zero();
for o in 0..n {
let mut reco = &mut data[o];
if o == j {
if reco.near.i != b as u32 {
reco.seco = reco.near;
}
reco.near = DistancePair {
i: b as u32,
d: N::zero(),
};
continue;
}
let djo = mat.get(j, o);
if reco.near.i == b as u32 {
if djo < reco.seco.d {
reco.near = DistancePair {
i: b as u32,
d: djo,
};
} else {
reco.near = reco.seco;
reco.seco = update_second_nearest(mat, &med, reco.near.i as usize, b, o, djo);
}
} else {
if djo < reco.near.d {
reco.seco = reco.near;
reco.near = DistancePair {
i: b as u32,
d: djo,
};
} else if reco.seco.i == b as u32 {
reco.seco = update_second_nearest(mat, &med, reco.near.i as usize, b, o, djo);
} else if djo < reco.seco.d {
reco.seco = DistancePair {
i: b as u32,
d: djo,
};
}
}
newloss.safe_inc(reco.near.d);
}
#[cfg(feature = "assertions")]
debug_validate_assignment(&mat, &med, &data);
newloss
}
#[cfg(feature = "rand")]
#[inline]
pub fn random_initialization(n: usize, k: usize, mut rng: &mut impl rand::Rng) -> Vec<usize> {
rand::seq::index::sample(&mut rng, n, k).into_vec()
}
pub fn fasterpam<M, N>(
mat: &M,
mut med: &mut Vec<usize>,
maxiter: usize,
) -> (N, Vec<usize>, usize, usize)
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd + std::fmt::Display,
M: ArrayAdapter<N>,
{
let n = mat.len();
let k = med.len();
let mut data = Vec::<Rec<N>>::with_capacity(n);
let mut loss = initial_assignment(mat, &med, &mut data);
#[cfg(feature = "assertions")]
debug_validate_assignment(&mat, &med, &data);
let mut removal_loss = vec![N::zero(); k];
update_removal_loss(&data, &mut removal_loss);
let mut lastswap = n;
let mut n_swaps = 0;
let mut iter = 0;
while iter < maxiter {
iter += 1;
let swaps_before = n_swaps;
for j in 0..n {
if j == lastswap {
break;
}
if j == data[j].near.i as usize {
continue;
}
let (change, b) = find_best_swap(mat, &removal_loss, &data, j);
if change >= N::zero() {
continue;
}
n_swaps += 1;
lastswap = j;
let newloss = do_swap(mat, &mut med, &mut data, b, j);
if newloss >= loss {
break;
}
loss = newloss;
update_removal_loss(&data, &mut removal_loss);
}
if n_swaps == swaps_before {
break;
}
}
let assi = data.iter().map(|x| x.near.i as usize).collect();
(loss, assi, iter, n_swaps)
}
pub fn fastpam1<M, N>(
mat: &M,
mut med: &mut Vec<usize>,
maxiter: usize,
) -> (N, Vec<usize>, usize, usize)
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd + std::fmt::Display,
M: ArrayAdapter<N>,
{
let n = mat.len();
let k = med.len();
let mut data = Vec::<Rec<N>>::with_capacity(n);
let mut loss = initial_assignment(mat, &med, &mut data);
#[cfg(feature = "assertions")]
debug_validate_assignment(&mat, &med, &data);
let mut removal_loss = vec![N::zero(); k];
let mut n_swaps = 0;
let mut iter = 0;
while iter < maxiter {
iter += 1;
let mut best = (N::zero(), usize::MAX, usize::MAX);
update_removal_loss(&data, &mut removal_loss);
for j in 0..n {
if j == data[j].near.i as usize {
continue;
}
let (change, b) = find_best_swap(mat, &removal_loss, &data, j);
if change >= best.0 {
continue;
}
best = (change, b, j);
}
if best.0 < N::zero() {
n_swaps += 1;
let newloss = do_swap(mat, &mut med, &mut data, best.1, best.2);
if newloss >= loss {
break;
}
loss = newloss;
} else {
break;
}
}
let assi = data.iter().map(|x| x.near.i as usize).collect();
(loss, assi, iter, n_swaps)
}
fn pam_optimize<M, N>(
mat: &M,
mut med: &mut Vec<usize>,
mut data: &mut Vec<Rec<N>>,
maxiter: usize,
mut loss: N,
) -> (N, Vec<usize>, usize, usize)
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd + std::fmt::Display,
M: ArrayAdapter<N>,
{
let n = mat.len();
let k = med.len();
#[cfg(feature = "assertions")]
debug_validate_assignment(&mat, &med, &data);
let mut n_swaps = 0;
let mut iter = 0;
while iter < maxiter {
iter += 1;
let mut best = (N::zero(), k, usize::MAX);
for j in 0..n {
if j == data[j].near.i as usize {
continue;
}
let (change, b) = find_best_swap_pam(mat, &med, &data, j);
if change >= best.0 {
continue;
}
best = (change, b, j);
}
if best.0 < N::zero() {
n_swaps += 1;
let newloss = do_swap(mat, &mut med, &mut data, best.1, best.2);
if newloss >= loss {
break;
}
loss = newloss;
} else {
break;
}
}
let assi = data.iter().map(|x| x.near.i as usize).collect();
(loss, assi, iter, n_swaps)
}
fn pam_build_initialize<M, N>(mat: &M, meds: &mut Vec<usize>, data: &mut Vec<Rec<N>>, k: usize) -> N
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd + std::fmt::Display,
M: ArrayAdapter<N>,
{
let n = mat.len();
let mut best = (N::zero(), k);
for i in 0..n {
let mut sum = N::zero();
for j in 0..n {
if j != i {
sum += mat.get(i, j);
}
}
if i == 0 || sum < best.0 {
best = (sum, i);
}
}
let mut loss = best.0;
meds.push(best.1);
for j in 0..n {
data.push(Rec::<N> {
near: DistancePair {
i: 0,
d: mat.get(best.1, j),
},
seco: DistancePair {
i: u32::MAX,
d: N::zero(),
},
});
}
for l in 1..k {
best = (N::zero(), k);
for i in 1..n {
let mut sum = N::zero();
for j in 0..n {
if j != i {
let d = mat.get(i, j);
if d < data[j].near.d {
sum += d - data[j].near.d;
}
}
}
if i == 0 || sum < best.0 {
best = (sum, i);
}
}
assert!(best.0 <= N::zero());
loss = N::zero();
for j in 0..n {
let mut recj = &mut data[j];
if j == best.1 {
recj.seco = recj.near;
recj.near = DistancePair {
i: l as u32,
d: N::zero(),
};
continue;
}
let dj = mat.get(best.1, j);
if dj < recj.near.d {
recj.seco = recj.near;
recj.near = DistancePair { i: l as u32, d: dj };
} else if recj.seco.i == u32::MAX || dj < recj.seco.d {
recj.seco = DistancePair { i: l as u32, d: dj };
}
loss.safe_inc(recj.near.d);
}
meds.push(best.1);
}
loss
}
pub fn pam_swap<M, N>(
mat: &M,
mut med: &mut Vec<usize>,
maxiter: usize,
) -> (N, Vec<usize>, usize, usize)
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd + std::fmt::Display,
M: ArrayAdapter<N>,
{
let mut data = Vec::<Rec<N>>::with_capacity(mat.len());
let loss = initial_assignment(mat, &med, &mut data);
pam_optimize(mat, &mut med, &mut data, maxiter, loss)
}
pub fn pam_build<M, N>(mat: &M, k: usize) -> (N, Vec<usize>, Vec<usize>)
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd + std::fmt::Display,
M: ArrayAdapter<N>,
{
let n = mat.len();
assert!(mat.is_square(), "Dissimilarity matrix is not square");
assert!(n <= u32::MAX as usize, "N is too large");
assert!(k > 0 && k < u32::MAX as usize, "invalid N");
assert!(k <= n, "k must be at most N");
let mut meds = Vec::<usize>::with_capacity(k);
let mut data = Vec::<Rec<N>>::with_capacity(n);
let loss = pam_build_initialize(mat, &mut meds, &mut data, k);
let assi = data.iter().map(|x| x.near.i as usize).collect();
(loss, assi, meds)
}
pub fn pam<M, N>(mat: &M, k: usize, maxiter: usize) -> (N, Vec<usize>, Vec<usize>, usize, usize)
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd + std::fmt::Display,
M: ArrayAdapter<N>,
{
let n = mat.len();
assert!(mat.is_square(), "Dissimilarity matrix is not square");
assert!(n <= u32::MAX as usize, "N is too large");
assert!(k > 0 && k < u32::MAX as usize, "invalid N");
assert!(k <= n, "k must be at most N");
let mut meds = Vec::<usize>::with_capacity(k);
let mut data = Vec::<Rec<N>>::with_capacity(n);
let loss = pam_build_initialize(mat, &mut meds, &mut data, k);
let (nloss, assi, n_iter, n_swap) = pam_optimize(mat, &mut meds, &mut data, maxiter, loss);
(nloss, assi, meds, n_iter, n_swap)
}
#[inline]
fn assign_nearest<M, N>(mat: &M, med: &[usize], data: &mut Vec<usize>) -> N
where
N: NumAssignOps + Zero + PartialOrd + Copy + SafeAdd,
M: ArrayAdapter<N>,
{
let n = mat.len();
let k = med.len();
assert!(mat.is_square(), "Dissimilarity matrix is not square");
assert!(n <= u32::MAX as usize, "N is too large");
assert!(k > 0 && k < u32::MAX as usize, "invalid N");
assert!(k <= n, "k must be at most N");
let firstcenter = med[0];
let mut loss: N = N::zero();
for i in 0..n {
let mut cur = 0;
let mut best = mat.get(i, firstcenter);
for m in 1..k {
let dm = mat.get(i, med[m]);
if dm < best || i == med[m] {
cur = m;
best = dm;
}
}
loss.safe_inc(best);
assert!(data.len() >= i);
if data.len() == i {
data.push(cur);
} else {
data[i] = cur;
}
}
loss
}
pub fn choose_medoid_within_partition<M, N>(
mat: &M,
assi: &[usize],
med: &mut [usize],
m: usize,
) -> bool
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd + std::fmt::Display,
M: ArrayAdapter<N>,
{
let first = med[m];
let mut best = first;
let mut sumb = N::zero();
for (i, a) in assi.iter().enumerate() {
if first != i && *a == m {
sumb.safe_inc(mat.get(first, i));
}
}
for (j, aj) in assi.iter().enumerate() {
if j != first && *aj == m {
let mut sumj = N::zero();
for (i, ai) in assi.iter().enumerate() {
if i != j && *ai == m {
sumj.safe_inc(mat.get(j, i));
}
}
if sumj < sumb {
best = j;
sumb = sumj;
}
}
}
med[m] = best;
best != first
}
pub fn alternating<M, N>(
mat: &M,
mut med: &mut Vec<usize>,
maxiter: usize,
) -> (N, Vec<usize>, usize)
where
N: NumAssignOps + Signed + Zero + PartialOrd + Copy + SafeAdd + std::fmt::Display,
M: ArrayAdapter<N>,
{
let n = mat.len();
let k = med.len();
let mut assi = Vec::<usize>::with_capacity(n);
let mut loss = assign_nearest(mat, &med, &mut assi);
let mut iter = 0;
while iter < maxiter {
iter += 1;
let mut changed = false;
for i in 0..k {
changed |= choose_medoid_within_partition(mat, &assi, &mut med, i);
}
if !changed {
break;
}
loss = assign_nearest(mat, &med, &mut assi);
}
(loss, assi, iter)
}
#[cfg(test)]
mod tests {
use crate::{
alternating, arrayadapter::LowerTriangle, fasterpam, fastpam1, pam, pam_build, pam_swap,
};
fn assert_array(result: Vec<usize>, expect: Vec<usize>, msg: &'static str) {
assert!(result.iter().zip(expect.iter()).all(|(a, b)| a == b), msg);
}
#[test]
fn testfasterpam_simple() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let mut meds = vec![0, 1];
let (loss, assi, n_iter, n_swap) = fasterpam(&data, &mut meds, 10);
assert_eq!(loss, 4, "loss not as expected");
assert_eq!(n_swap, 2, "swaps not as expected");
assert_eq!(n_iter, 2, "iterations not as expected");
assert_array(assi, vec![0, 0, 0, 1, 1], "assignment not as expected");
assert_array(meds, vec![0, 3], "medoids not as expected");
}
#[test]
fn testfastpam_simple() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let mut meds = vec![0, 1];
let (loss, assi, n_iter, n_swap) = fastpam1(&data, &mut meds, 10);
assert_eq!(loss, 4, "loss not as expected");
assert_eq!(n_swap, 1, "swaps not as expected");
assert_eq!(n_iter, 2, "iterations not as expected");
assert_array(assi, vec![0, 0, 0, 1, 1], "assignment not as expected");
assert_array(meds, vec![0, 3], "medoids not as expected");
}
#[test]
fn testpam_swap_simple() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let mut meds = vec![0, 1];
let (loss, assi, n_iter, n_swap) = pam_swap(&data, &mut meds, 10);
assert_eq!(loss, 4, "loss not as expected");
assert_eq!(n_swap, 1, "swaps not as expected");
assert_eq!(n_iter, 2, "iterations not as expected");
assert_array(assi, vec![0, 0, 0, 1, 1], "assignment not as expected");
assert_array(meds, vec![0, 3], "medoids not as expected");
}
#[test]
fn testpam_build_simple() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let (loss, assi, meds) = pam_build(&data, 2);
assert_eq!(loss, 4, "loss not as expected");
assert_array(assi, vec![0, 0, 0, 1, 1], "assignment not as expected");
assert_array(meds, vec![0, 3], "medoids not as expected");
}
#[test]
fn testpam_simple() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let (loss, assi, meds, n_iter, n_swap) = pam(&data, 2, 10);
assert_eq!(n_swap, 0, "swaps not as expected");
assert_eq!(n_iter, 1, "iterations not as expected");
assert_eq!(loss, 4, "loss not as expected");
assert_array(assi, vec![0, 0, 0, 1, 1], "assignment not as expected");
assert_array(meds, vec![0, 3], "medoids not as expected");
}
#[test]
fn testalternating() {
let data = LowerTriangle {
n: 5,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 1],
};
let mut meds = vec![0, 1];
let (loss, assi, n_iter) = alternating(&data, &mut meds, 10);
assert_eq!(n_iter, 3, "iterations not as expected");
assert_eq!(loss, 4, "loss not as expected");
assert_array(assi, vec![1, 1, 1, 0, 0], "assignment not as expected");
assert_array(meds, vec![3, 0], "medoids not as expected");
}
}