algorithms_edu/algo/ml/clustering/hierarchical.rs
1// Copied from the `kodama` crate under MIT license
2/// A method for computing the dissimilarities between clusters.
3///
4/// The method selected dictates how the dissimilarities are computed whenever
5/// a new cluster is formed. In particular, when clusters `a` and `b` are
6/// merged into a new cluster `ab`, then the pairwise dissimilarity between
7/// `ab` and every other cluster is computed using one of the methods variants
8/// in this type.
9#[derive(Clone, Copy, Debug, Eq, PartialEq)]
10pub enum Method {
11 /// Assigns the minimum dissimilarity between all pairs of observations.
12 ///
13 /// Specifically, if `AB` is a newly merged cluster and `X` is every other
14 /// cluster, then the pairwise dissimilarity between `AB` and `X` is
15 /// computed by
16 ///
17 /// ```text
18 /// min(d[ab, x] for ab in AB for x in X)
19 /// ```
20 ///
21 /// where `ab` and `x` correspond to all observations in `AB` and `X`,
22 /// respectively.
23 Single,
24 /// Assigns the maximum dissimilarity between all pairs of observations.
25 ///
26 /// Specifically, if `AB` is a newly merged cluster and `X` is every other
27 /// cluster, then the pairwise dissimilarity between `AB` and `X` is
28 /// computed by
29 ///
30 /// ```text
31 /// max(d[ab, x] for ab in AB for x in X)
32 /// ```
33 ///
34 /// where `ab` and `x` correspond to all observations in `AB` and `X`,
35 /// respectively.
36 Complete,
37 /// Assigns the average dissimilarity between all pairs of observations.
38 ///
39 /// Specifically, if `AB` is a newly merged cluster and `X` is every other
40 /// cluster, then the pairwise dissimilarity between `AB` and `X` is
41 /// computed by
42 ///
43 /// ```text
44 /// sum(d[ab, x] for ab in AB for x in X) / (|AB| * |X|)
45 /// ```
46 ///
47 /// where `ab` and `x` correspond to all observations in `AB` and `X`,
48 /// respectively, and `|AB|` and `|X|` correspond to the total number of
49 /// observations in `AB` and `X`, respectively.
50 Average,
51 /// Assigns the weighted dissimilarity between clusters.
52 ///
53 /// Specifically, if `AB` is a newly merged cluster and `X` is every other
54 /// cluster, then the pairwise dissimilarity between `AB` and `X` is
55 /// computed by
56 ///
57 /// ```text
58 /// 0.5 * (d(A, X) + d(B, X))
59 /// ```
60 ///
61 /// where `A` and `B` correspond to the clusters that merged to create
62 /// `AB`.
63 Weighted,
64 /// Assigns the Ward dissimilarity between clusters.
65 ///
66 /// Specifically, if `AB` is a newly merged cluster and `X` is every other
67 /// cluster, then the pairwise dissimilarity between `AB` and `X` is
68 /// computed by
69 ///
70 /// ```text
71 /// let t1 = d(A, X)^2 * (|A| + |X|);
72 /// let t2 = d(B, X)^2 * (|B| + |X|);
73 /// let t3 = d(A, B)^2 * |X|;
74 /// let T = |A| + |B| + |X|;
75 /// sqrt(t1/T + t2/T + t3/T)
76 /// ```
77 ///
78 /// where `A` and `B` correspond to the clusters that merged to create
79 /// `AB`.
80 Ward,
81 /// Assigns the centroid dissimilarity between clusters.
82 ///
83 /// Specifically, if `AB` is a newly merged cluster and `X` is every other
84 /// cluster, then the pairwise dissimilarity between `AB` and `X` is
85 /// computed by
86 ///
87 /// ```text
88 /// let t1 = |A| * d(A, X) + |B| * d(B, X));
89 /// let t2 = |A| * |B| * d(A, B);
90 /// let size = |A| + |B|;
91 /// sqrt(t1/size + t2/size^2)
92 /// ```
93 ///
94 /// where `A` and `B` correspond to the clusters that merged to create
95 /// `AB`.
96 Centroid,
97 /// Assigns the median dissimilarity between clusters.
98 ///
99 /// Specifically, if `AB` is a newly merged cluster and `X` is every other
100 /// cluster, then the pairwise dissimilarity between `AB` and `X` is
101 /// computed by
102 ///
103 /// ```text
104 /// sqrt(d(A, X)/2 + d(B, X)/2 - d(A, B)/4)
105 /// ```
106 ///
107 /// where `A` and `B` correspond to the clusters that merged to create
108 /// `AB`.
109 Median,
110}