algorithms_edu/algo/ml/clustering/
hierarchical.rs

1// Copied from the `kodama` crate under MIT license
2/// A method for computing the dissimilarities between clusters.
3///
4/// The method selected dictates how the dissimilarities are computed whenever
5/// a new cluster is formed. In particular, when clusters `a` and `b` are
6/// merged into a new cluster `ab`, then the pairwise dissimilarity between
7/// `ab` and every other cluster is computed using one of the methods variants
8/// in this type.
9#[derive(Clone, Copy, Debug, Eq, PartialEq)]
10pub enum Method {
11    /// Assigns the minimum dissimilarity between all pairs of observations.
12    ///
13    /// Specifically, if `AB` is a newly merged cluster and `X` is every other
14    /// cluster, then the pairwise dissimilarity between `AB` and `X` is
15    /// computed by
16    ///
17    /// ```text
18    /// min(d[ab, x] for ab in AB for x in X)
19    /// ```
20    ///
21    /// where `ab` and `x` correspond to all observations in `AB` and `X`,
22    /// respectively.
23    Single,
24    /// Assigns the maximum dissimilarity between all pairs of observations.
25    ///
26    /// Specifically, if `AB` is a newly merged cluster and `X` is every other
27    /// cluster, then the pairwise dissimilarity between `AB` and `X` is
28    /// computed by
29    ///
30    /// ```text
31    /// max(d[ab, x] for ab in AB for x in X)
32    /// ```
33    ///
34    /// where `ab` and `x` correspond to all observations in `AB` and `X`,
35    /// respectively.
36    Complete,
37    /// Assigns the average dissimilarity between all pairs of observations.
38    ///
39    /// Specifically, if `AB` is a newly merged cluster and `X` is every other
40    /// cluster, then the pairwise dissimilarity between `AB` and `X` is
41    /// computed by
42    ///
43    /// ```text
44    /// sum(d[ab, x] for ab in AB for x in X) / (|AB| * |X|)
45    /// ```
46    ///
47    /// where `ab` and `x` correspond to all observations in `AB` and `X`,
48    /// respectively, and `|AB|` and `|X|` correspond to the total number of
49    /// observations in `AB` and `X`, respectively.
50    Average,
51    /// Assigns the weighted dissimilarity between clusters.
52    ///
53    /// Specifically, if `AB` is a newly merged cluster and `X` is every other
54    /// cluster, then the pairwise dissimilarity between `AB` and `X` is
55    /// computed by
56    ///
57    /// ```text
58    /// 0.5 * (d(A, X) + d(B, X))
59    /// ```
60    ///
61    /// where `A` and `B` correspond to the clusters that merged to create
62    /// `AB`.
63    Weighted,
64    /// Assigns the Ward dissimilarity between clusters.
65    ///
66    /// Specifically, if `AB` is a newly merged cluster and `X` is every other
67    /// cluster, then the pairwise dissimilarity between `AB` and `X` is
68    /// computed by
69    ///
70    /// ```text
71    /// let t1 = d(A, X)^2 * (|A| + |X|);
72    /// let t2 = d(B, X)^2 * (|B| + |X|);
73    /// let t3 = d(A, B)^2 * |X|;
74    /// let T = |A| + |B| + |X|;
75    /// sqrt(t1/T + t2/T + t3/T)
76    /// ```
77    ///
78    /// where `A` and `B` correspond to the clusters that merged to create
79    /// `AB`.
80    Ward,
81    /// Assigns the centroid dissimilarity between clusters.
82    ///
83    /// Specifically, if `AB` is a newly merged cluster and `X` is every other
84    /// cluster, then the pairwise dissimilarity between `AB` and `X` is
85    /// computed by
86    ///
87    /// ```text
88    /// let t1 = |A| * d(A, X) + |B| * d(B, X));
89    /// let t2 = |A| * |B| * d(A, B);
90    /// let size = |A| + |B|;
91    /// sqrt(t1/size + t2/size^2)
92    /// ```
93    ///
94    /// where `A` and `B` correspond to the clusters that merged to create
95    /// `AB`.
96    Centroid,
97    /// Assigns the median dissimilarity between clusters.
98    ///
99    /// Specifically, if `AB` is a newly merged cluster and `X` is every other
100    /// cluster, then the pairwise dissimilarity between `AB` and `X` is
101    /// computed by
102    ///
103    /// ```text
104    /// sqrt(d(A, X)/2 + d(B, X)/2 - d(A, B)/4)
105    /// ```
106    ///
107    /// where `A` and `B` correspond to the clusters that merged to create
108    /// `AB`.
109    Median,
110}