1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
use crate::arrayadapter::ArrayAdapter;
use crate::silhouette::checked_div;
use core::ops::{AddAssign, Div, Sub};
use num_traits::{Signed, Zero};
use rayon::prelude::*;
use std::convert::From;

/// Compute the Silhouette of a strict partitional clustering (parallel implementation).
///
/// The Silhouette, proposed by Peter Rousseeuw in 1987, is a popular internal
/// evaluation measure for clusterings. Although it is defined on arbitary metrics,
/// it is most appropriate for evaluating "spherical" clusters, as it expects objects
/// to be closer to all members of its own cluster than to members of other clusters.
///
/// Because of the additional requirement of a division operator, this implementation
/// currently always returns a float result, and accepts only input distances that can be
/// converted into floats.
///
/// * type `M` - matrix data type such as `ndarray::Array2` or `kmedoids::arrayadapter::LowerTriangle`
/// * type `N` - number data type such as `u32` or `f64`
/// * type `L` - number data type such as `f64` for the cost (use a float type)
/// * `mat` - a pairwise distance matrix
/// * `assi` - the cluster assignment
/// * `samples` - whether to keep the individual samples, or not
///
/// returns a tuple containing:
/// * the average silhouette
/// * the individual silhouette values (empty if `samples = false`)
///
/// ## Panics
///
/// * panics when the dissimilarity matrix is not square
///
/// ## Example
/// Given a dissimilarity matrix of size 4 x 4, use:
/// ```
/// let data = ndarray::arr2(&[[0,1,2,3],[1,0,4,5],[2,4,0,6],[3,5,6,0]]);
/// let mut meds = kmedoids::random_initialization(4, 2, &mut rand::thread_rng());
/// let (loss, assi, n_iter): (f64, _, _) = kmedoids::alternating(&data, &mut meds, 100);
/// let sil: f64 = kmedoids::par_silhouette(&data, &assi);
/// println!("Silhouette is: {}", sil);
/// ```
#[cfg(feature = "parallel")]
pub fn par_silhouette<M, N, L>(mat: &M, assi: &[usize]) -> L
where
	N: Zero + PartialOrd + Copy + Sync + Send,
	L: AddAssign
		+ Div<Output = L>
		+ Sub<Output = L>
		+ Signed
		+ Zero
		+ PartialOrd
		+ Copy
		+ From<N>
		+ From<u32>
		+ Sync
		+ Send,
	M: ArrayAdapter<N> + Sync + Send,
{
	let mut lsum = L::zero();
	assert!(mat.is_square(), "Dissimilarity matrix is not square");
	assi.into_par_iter()
		.enumerate()
		.map(|(i, &ai)| {
			let mut buf = Vec::<(u32, L)>::new();
			buf.clear();
			for (j, &aj) in assi.iter().enumerate() {
				while aj >= buf.len() {
					buf.push((0, L::zero()));
				}
				if i != j {
					buf[aj].0 += 1;
					buf[aj].1 += mat.get(i, j).into();
				}
			}
			if buf[ai].0 > 0 {
				let a = checked_div(buf[ai].1, buf[ai].0.into());
				let mut tmp = buf
					.iter()
					.enumerate()
					.filter(|&(i, _)| i != ai)
					.map(|(_, p)| checked_div(p.1, p.0.into()));
				// Ugly hack to get the min():
				let tmp2 = tmp.next().unwrap_or_else(L::zero);
				let b = tmp.fold(tmp2, |x, y| if y < x { y } else { x });
				checked_div(b - a, if a > b { a } else { b }) // return value
			} else {
				L::zero() // singleton
			}
		})
		.collect::<Vec<L>>()
		.iter()
		.for_each(|x| lsum += *x);
	lsum.div((assi.len() as u32).into())
}