Skip to main content

rust_igraph/algorithms/community/
split_join_distance.rs

1//! ALGO-CM-016 — `split_join_distance` (asymmetric projection distances).
2//!
3//! Given two membership vectors over the same vertex set, returns the
4//! **pair** of projection distances `(d12, d21)` where:
5//!
6//! - `d12 = n − Σ_i max_j |R_i ∩ C_j|` (project `comm1` onto `comm2`)
7//! - `d21 = n − Σ_j max_i |R_i ∩ C_j|` (project `comm2` onto `comm1`)
8//!
9//! The total split-join distance (van Dongen 2000) is `d12 + d21`. One
10//! component being zero means one partition is a *sub-partition* of the
11//! other; both being zero means the partitions are identical.
12//!
13//! Mirrors `igraph_split_join_distance` in
14//! `references/igraph/src/community/community_misc.c`. The C reference
15//! exposes the asymmetric pair specifically so callers can detect the
16//! sub-partition relationship — [`crate::compare_communities`] with
17//! [`crate::CommunityComparison::SplitJoin`] returns the sum.
18//!
19//! Reuses the densification (via [`crate::reindex_membership`]) and
20//! sparse-confusion-matrix machinery from CM-015 — both inputs are
21//! reindexed once, then a single `HashMap<(u32, u32), u32>` pass yields
22//! the row- and column-max sums.
23
24#![allow(
25    clippy::cast_precision_loss,
26    clippy::cast_possible_truncation,
27    clippy::cast_sign_loss
28)]
29
30use crate::core::error::{IgraphError, IgraphResult};
31
32use super::compare_communities::split_join_distances;
33use super::reindex_membership::reindex_membership;
34
35/// Asymmetric split-join distances between two community partitions.
36///
37/// `d12` is the projection distance of `comm1` from `comm2`; `d21` is
38/// the projection distance in the other direction. The (symmetric)
39/// split-join distance is [`SplitJoinDistance::total`] = `d12 + d21`.
40#[derive(Debug, Copy, Clone, Eq, PartialEq)]
41pub struct SplitJoinDistance {
42    /// Projection distance of `comm1` from `comm2`.
43    pub d12: u64,
44    /// Projection distance of `comm2` from `comm1`.
45    pub d21: u64,
46}
47
48impl SplitJoinDistance {
49    /// Total (symmetric) split-join distance, `d12 + d21`.
50    #[must_use]
51    pub fn total(self) -> u64 {
52        self.d12 + self.d21
53    }
54}
55
56/// Compute the two asymmetric projection distances between `comm1` and
57/// `comm2`.
58///
59/// Both vectors must have the same length. Cluster ids do not have to
60/// be densified — they are reindexed internally via
61/// [`crate::reindex_membership`]. Empty inputs return `(0, 0)`.
62///
63/// # Examples
64/// ```
65/// use rust_igraph::split_join_distance;
66///
67/// // comm1 is a sub-partition of comm2: every cluster in comm1 fits
68/// // entirely inside some cluster of comm2, so d12 == 0.
69/// let r = split_join_distance(&[0, 0, 1, 2], &[0, 0, 0, 1]).unwrap();
70/// assert_eq!(r.d12, 0);
71/// assert_eq!(r.d21, 1);
72/// assert_eq!(r.total(), 1);
73///
74/// // Identical partitions: both distances are 0.
75/// let r = split_join_distance(&[0, 0, 1, 1], &[5, 5, 7, 7]).unwrap();
76/// assert_eq!(r.d12, 0);
77/// assert_eq!(r.d21, 0);
78/// ```
79///
80/// # Errors
81/// - [`IgraphError::InvalidArgument`] if `comm1.len() != comm2.len()`.
82pub fn split_join_distance(comm1: &[u32], comm2: &[u32]) -> IgraphResult<SplitJoinDistance> {
83    if comm1.len() != comm2.len() {
84        return Err(IgraphError::InvalidArgument(format!(
85            "community membership vectors have different lengths: {} and {}",
86            comm1.len(),
87            comm2.len(),
88        )));
89    }
90    let n = comm1.len();
91    if n == 0 {
92        return Ok(SplitJoinDistance { d12: 0, d21: 0 });
93    }
94    let c1 = reindex_membership(comm1)?;
95    let c2 = reindex_membership(comm2)?;
96    let (d12, d21) = split_join_distances(&c1.membership, &c2.membership, n);
97    Ok(SplitJoinDistance { d12, d21 })
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103
104    #[test]
105    fn empty_inputs_yield_zero_zero() {
106        let r = split_join_distance(&[], &[]).unwrap();
107        assert_eq!(r.d12, 0);
108        assert_eq!(r.d21, 0);
109        assert_eq!(r.total(), 0);
110    }
111
112    #[test]
113    fn length_mismatch_errors() {
114        let err = split_join_distance(&[0, 0], &[0, 0, 0]).unwrap_err();
115        match err {
116            IgraphError::InvalidArgument(_) => (),
117            other => panic!("expected InvalidArgument, got {other:?}"),
118        }
119    }
120
121    #[test]
122    fn identical_partition_is_zero() {
123        let r = split_join_distance(&[0, 0, 1, 1, 2, 2], &[3, 3, 5, 5, 7, 7]).unwrap();
124        assert_eq!(r.d12, 0);
125        assert_eq!(r.d21, 0);
126    }
127
128    #[test]
129    fn subpartition_asymmetry() {
130        // comm1 = {{0,1},{2},{3}} is a sub-partition of comm2 = {{0,1,2},{3}}.
131        // Every comm1 cluster fits inside a comm2 cluster ⇒ d12 = 0.
132        // In the reverse direction: comm2[0]={0,1,2} overlaps comm1
133        // clusters with sizes (2, 1) → max=2 → contributes 2; comm2[1]={3}
134        // → contributes 1; sum = 3, n = 4 ⇒ d21 = 1.
135        let r = split_join_distance(&[0, 0, 1, 2], &[0, 0, 0, 1]).unwrap();
136        assert_eq!(r.d12, 0);
137        assert_eq!(r.d21, 1);
138        assert_eq!(r.total(), 1);
139    }
140
141    #[test]
142    fn full_disagreement_2x2() {
143        // Canonical worst case for n=4: [0,0,1,1] vs [0,1,0,1].
144        // Each comm1 cluster overlaps comm2 clusters by (1, 1) → max=1
145        // each → row sum = 2 → d12 = 4 - 2 = 2. Same for d21 by symmetry.
146        // Total = 4 (the known SplitJoin from CM-015 unit tests).
147        let r = split_join_distance(&[0, 0, 1, 1], &[0, 1, 0, 1]).unwrap();
148        assert_eq!(r.d12, 2);
149        assert_eq!(r.d21, 2);
150        assert_eq!(r.total(), 4);
151    }
152
153    #[test]
154    fn matches_compare_communities_split_join_sum() {
155        use crate::{CommunityComparison, compare_communities};
156        let a = [0u32, 0, 1, 2, 2, 3, 3, 3];
157        let b = [0u32, 1, 1, 2, 3, 3, 0, 2];
158        let r = split_join_distance(&a, &b).unwrap();
159        let total = compare_communities(&a, &b, CommunityComparison::SplitJoin).unwrap();
160        assert!((total - (r.total() as f64)).abs() < 1e-12);
161    }
162
163    #[test]
164    fn relabel_invariant() {
165        // Distances depend only on the partition, not the cluster ids.
166        let a = [0u32, 0, 1, 1, 2, 2];
167        let b = [0u32, 1, 0, 1, 0, 1];
168        let r1 = split_join_distance(&a, &b).unwrap();
169        // Relabel a: 0 -> 17, 1 -> 4, 2 -> 999. Relabel b: 0 -> 1_000_000, 1 -> 5.
170        let a2: Vec<u32> = a
171            .iter()
172            .map(|&c| match c {
173                0 => 17,
174                1 => 4,
175                _ => 999,
176            })
177            .collect();
178        let b2: Vec<u32> = b
179            .iter()
180            .map(|&c| if c == 0 { 1_000_000 } else { 5 })
181            .collect();
182        let r2 = split_join_distance(&a2, &b2).unwrap();
183        assert_eq!(r1, r2);
184    }
185
186    #[test]
187    fn single_singleton_block() {
188        // n=5, both partitions all singletons. Every cluster maps 1:1 ⇒
189        // d12 = d21 = 0.
190        let v: Vec<u32> = (0..5).collect();
191        let r = split_join_distance(&v, &v).unwrap();
192        assert_eq!(r.d12, 0);
193        assert_eq!(r.d21, 0);
194    }
195}
196
197#[cfg(all(test, feature = "proptest-harness"))]
198mod proptests {
199    use super::*;
200    use proptest::collection::vec;
201    use proptest::prelude::*;
202
203    proptest! {
204        #[test]
205        fn non_negative_components(
206            (a, b) in (1usize..40).prop_flat_map(|n| (vec(0u32..5, n), vec(0u32..5, n)))
207        ) {
208            let r = split_join_distance(&a, &b).unwrap();
209            // d12, d21 ∈ [0, n] always.
210            let n = a.len() as u64;
211            prop_assert!(r.d12 <= n);
212            prop_assert!(r.d21 <= n);
213        }
214
215        #[test]
216        fn total_equals_compare_communities_split_join(
217            (a, b) in (1usize..40).prop_flat_map(|n| (vec(0u32..5, n), vec(0u32..5, n)))
218        ) {
219            use crate::{CommunityComparison, compare_communities};
220            let r = split_join_distance(&a, &b).unwrap();
221            let total = compare_communities(&a, &b, CommunityComparison::SplitJoin).unwrap();
222            prop_assert!((total - (r.total() as f64)).abs() < 1e-12);
223        }
224
225        #[test]
226        fn relabel_invariant_prop(
227            (a, b) in (2usize..30).prop_flat_map(|n| (vec(0u32..6, n), vec(0u32..6, n))),
228            offset in 1u32..1000,
229        ) {
230            let r1 = split_join_distance(&a, &b).unwrap();
231            let a2: Vec<u32> = a.iter().map(|&c| c.wrapping_add(offset).wrapping_mul(31)).collect();
232            let b2: Vec<u32> = b.iter().map(|&c| c.wrapping_add(offset.wrapping_mul(7))).collect();
233            let r2 = split_join_distance(&a2, &b2).unwrap();
234            prop_assert_eq!(r1, r2);
235        }
236
237        #[test]
238        fn identical_partitions_are_zero(
239            a in vec(0u32..7, 1..40),
240        ) {
241            let r = split_join_distance(&a, &a).unwrap();
242            prop_assert_eq!(r.d12, 0);
243            prop_assert_eq!(r.d21, 0);
244        }
245
246        #[test]
247        fn subpartition_one_side_is_zero(
248            a in vec(0u32..4, 2..25),
249        ) {
250            // Coarsen `a` into `b` by collapsing every cluster c -> c/2.
251            // The result `b` is strictly coarser than `a`, so `a` is a
252            // sub-partition of `b` and d12 = 0 (project a -> b: every
253            // a-cluster fits entirely in one b-cluster).
254            let b: Vec<u32> = a.iter().map(|&c| c / 2).collect();
255            let r = split_join_distance(&a, &b).unwrap();
256            prop_assert_eq!(r.d12, 0);
257        }
258    }
259}