Skip to main content

rust_igraph/algorithms/community/
reindex_membership.rs

1//! ALGO-CM-014 — `reindex_membership` (densify membership labels).
2//!
3//! Take any `membership[v] = c` vector with arbitrary cluster ids
4//! (including gaps and negatives in the C version's `int` typing —
5//! here, any `u32`) and produce a new, densified `0..k-1` labelling
6//! such that the *partition* is identical. Optionally also report the
7//! old cluster id that each new id replaced, in encounter order.
8//!
9//! Mirrors `igraph_reindex_membership` /
10//! `igraph_i_reindex_membership_large` in
11//! `references/igraph/src/community/community_misc.c`.
12//!
13//! The C implementation has a fast path when every id is already in
14//! `0..n`, and a sort-based fallback otherwise. Because Rust's
15//! `Vec<u32>` already has cheap key-set operations via `HashMap`, we
16//! use a single `BTreeMap`-free first-occurrence scan: O(n) when the
17//! id space fits in a `Vec` (length `max_id + 1` is bounded by `n`),
18//! and `O(n log k)` via `BTreeMap` otherwise. Both branches preserve
19//! the *encounter order* — the new id of an old id is the index where
20//! it was first seen.
21
22use std::collections::BTreeMap;
23
24use crate::core::error::IgraphResult;
25
26/// Result of [`reindex_membership`]: a densified membership vector
27/// `[0, k)`, the old cluster id behind each new id (so `new_to_old[i]`
28/// is the original id that now maps to `i`), and the number of
29/// distinct clusters `k = new_to_old.len()`.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct ReindexMembershipResult {
32    /// `membership[v] ∈ [0, k)` — the densified cluster id of vertex
33    /// `v`.
34    pub membership: Vec<u32>,
35    /// `new_to_old[i]` — the original cluster id that vertex
36    /// encounters with this id now map to. `nb_clusters = new_to_old.len()`.
37    pub new_to_old: Vec<u32>,
38}
39
40impl ReindexMembershipResult {
41    /// Number of distinct clusters in the densified membership.
42    #[must_use]
43    pub fn nb_clusters(&self) -> u32 {
44        // `new_to_old` cannot exceed `membership.len()`, which is a
45        // `Vec` length bounded by `isize::MAX`. The cast is safe for
46        // any practical input.
47        u32::try_from(self.new_to_old.len()).unwrap_or(u32::MAX)
48    }
49}
50
51/// Relabel a membership vector so cluster ids run contiguously over
52/// `0..k`, where `k` is the number of distinct clusters. The *partition*
53/// is unchanged: two vertices share a cluster in the output iff they
54/// shared one in the input. New ids are assigned in *first-occurrence
55/// order* — the first cluster id you encounter when sweeping left to
56/// right becomes `0`, the next new one becomes `1`, and so on.
57///
58/// # Examples
59/// ```
60/// use rust_igraph::reindex_membership;
61///
62/// // Gaps and out-of-order ids are densified by first occurrence.
63/// let r = reindex_membership(&[7, 7, 3, 7, 9, 3]).unwrap();
64/// assert_eq!(r.membership, vec![0, 0, 1, 0, 2, 1]);
65/// assert_eq!(r.new_to_old, vec![7, 3, 9]);
66/// assert_eq!(r.nb_clusters(), 3);
67/// ```
68///
69/// # Errors
70/// Currently infallible — returns `IgraphResult` for API symmetry with
71/// other community helpers and to allow adding fallible checks later
72/// without a breaking change.
73pub fn reindex_membership(membership: &[u32]) -> IgraphResult<ReindexMembershipResult> {
74    let n = membership.len();
75
76    // Empty input is the trivial 0-cluster case.
77    if n == 0 {
78        return Ok(ReindexMembershipResult {
79            membership: Vec::new(),
80            new_to_old: Vec::new(),
81        });
82    }
83
84    // Mirror the C implementation: fast path when every id is in
85    // `0..n` — we can use a flat `Vec<u32>` of size `n` as the lookup,
86    // where `lookup[old] = new + 1` and 0 means "not yet seen".
87    let mut max_id: u32 = 0;
88    for &c in membership {
89        if c > max_id {
90            max_id = c;
91        }
92    }
93
94    if (max_id as usize) < n {
95        let mut lookup: Vec<u32> = vec![0; n];
96        let mut new_to_old: Vec<u32> = Vec::new();
97        let mut out: Vec<u32> = vec![0; n];
98        let mut next_new: u32 = 0;
99
100        for (i, &old) in membership.iter().enumerate() {
101            let slot = &mut lookup[old as usize];
102            if *slot == 0 {
103                *slot = next_new + 1;
104                new_to_old.push(old);
105                next_new = next_new.saturating_add(1);
106            }
107            out[i] = *slot - 1;
108        }
109
110        return Ok(ReindexMembershipResult {
111            membership: out,
112            new_to_old,
113        });
114    }
115
116    // Sparse / large-id fallback: BTreeMap keyed by old cluster id.
117    let mut lookup: BTreeMap<u32, u32> = BTreeMap::new();
118    let mut new_to_old: Vec<u32> = Vec::new();
119    let mut out: Vec<u32> = vec![0; n];
120    let mut next_new: u32 = 0;
121
122    for (i, &old) in membership.iter().enumerate() {
123        let new_id = if let Some(&nid) = lookup.get(&old) {
124            nid
125        } else {
126            let nid = next_new;
127            lookup.insert(old, nid);
128            new_to_old.push(old);
129            next_new = next_new.saturating_add(1);
130            nid
131        };
132        out[i] = new_id;
133    }
134
135    Ok(ReindexMembershipResult {
136        membership: out,
137        new_to_old,
138    })
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    #[test]
146    fn empty_membership() {
147        let r = reindex_membership(&[]).unwrap();
148        assert!(r.membership.is_empty());
149        assert!(r.new_to_old.is_empty());
150        assert_eq!(r.nb_clusters(), 0);
151    }
152
153    #[test]
154    fn already_densified_identity() {
155        let r = reindex_membership(&[0, 1, 2, 0, 1, 2]).unwrap();
156        assert_eq!(r.membership, vec![0, 1, 2, 0, 1, 2]);
157        assert_eq!(r.new_to_old, vec![0, 1, 2]);
158    }
159
160    #[test]
161    fn singleton_cluster_collapses_to_zero() {
162        let r = reindex_membership(&[42, 42, 42, 42]).unwrap();
163        assert_eq!(r.membership, vec![0, 0, 0, 0]);
164        assert_eq!(r.new_to_old, vec![42]);
165        assert_eq!(r.nb_clusters(), 1);
166    }
167
168    #[test]
169    fn gaps_are_compressed() {
170        // Old ids: 5, 5, 10, 5, 0 -> first-encounter: 5 -> 0, 10 -> 1, 0 -> 2.
171        let r = reindex_membership(&[5, 5, 10, 5, 0]).unwrap();
172        assert_eq!(r.membership, vec![0, 0, 1, 0, 2]);
173        assert_eq!(r.new_to_old, vec![5, 10, 0]);
174    }
175
176    #[test]
177    fn reverse_order_relabels_to_descending_in_new_to_old() {
178        // 3 vertices, ids 2, 1, 0. New ids assigned in first-encounter:
179        // 2 -> 0, 1 -> 1, 0 -> 2.
180        let r = reindex_membership(&[2, 1, 0]).unwrap();
181        assert_eq!(r.membership, vec![0, 1, 2]);
182        assert_eq!(r.new_to_old, vec![2, 1, 0]);
183    }
184
185    #[test]
186    fn singletons() {
187        let r = reindex_membership(&[7, 8, 9, 10]).unwrap();
188        assert_eq!(r.membership, vec![0, 1, 2, 3]);
189        assert_eq!(r.new_to_old, vec![7, 8, 9, 10]);
190    }
191
192    #[test]
193    fn large_id_takes_btreemap_fallback() {
194        // max_id (1_000_000) >> n (4), forces the sparse branch.
195        let r = reindex_membership(&[1_000_000, 7, 1_000_000, 7]).unwrap();
196        assert_eq!(r.membership, vec![0, 1, 0, 1]);
197        assert_eq!(r.new_to_old, vec![1_000_000, 7]);
198    }
199
200    #[test]
201    fn fast_path_max_id_equals_n_minus_1() {
202        // Edge of the fast-path branch: max_id == n - 1.
203        let r = reindex_membership(&[3, 0, 1, 2]).unwrap();
204        assert_eq!(r.membership, vec![0, 1, 2, 3]);
205        assert_eq!(r.new_to_old, vec![3, 0, 1, 2]);
206    }
207
208    #[test]
209    fn fast_path_max_id_equals_n_takes_sparse_branch() {
210        // max_id (4) == n (4), so max_id < n is false -> sparse path.
211        // The output is still correct.
212        let r = reindex_membership(&[4, 0, 1, 2]).unwrap();
213        assert_eq!(r.membership, vec![0, 1, 2, 3]);
214        assert_eq!(r.new_to_old, vec![4, 0, 1, 2]);
215    }
216
217    #[test]
218    fn preserves_partition_under_relabel() {
219        // Two vertices share a cluster in the input iff they share one
220        // in the output, regardless of label values.
221        let inputs: &[&[u32]] = &[
222            &[0, 0, 1, 2, 1, 2, 0],
223            &[5, 5, 1_000, 7, 1_000, 7, 5],
224            &[u32::MAX, 0, u32::MAX, 1, 0],
225        ];
226        for input in inputs {
227            let r = reindex_membership(input).unwrap();
228            for i in 0..input.len() {
229                for j in 0..input.len() {
230                    assert_eq!(
231                        input[i] == input[j],
232                        r.membership[i] == r.membership[j],
233                        "partition diverged at ({i}, {j}) for input {input:?}",
234                    );
235                }
236            }
237            // Densified labels really do run 0..k.
238            let k = r.nb_clusters();
239            for &c in &r.membership {
240                assert!(c < k);
241            }
242            for c in 0..k {
243                assert!(r.membership.contains(&c));
244            }
245            assert_eq!(r.new_to_old.len(), k as usize);
246        }
247    }
248
249    #[test]
250    fn fast_path_matches_sparse_path_for_in_range_ids() {
251        // Same input under both branches should yield the same result.
252        // The fast path triggers when max_id < n. We can force the
253        // sparse path by appending an out-of-range tail value, then
254        // truncating the result, and comparing the in-range prefix.
255        let input: Vec<u32> = vec![3, 0, 3, 1, 2, 0, 1];
256        let fast = reindex_membership(&input).unwrap();
257        let len_u32 = u32::try_from(input.len()).expect("test input length fits u32");
258        assert!(input.iter().copied().max().unwrap_or(0) < len_u32);
259        // Build a "sparse-branch" check by manually constructing the
260        // expected first-occurrence mapping and comparing.
261        let mut expected_new_to_old: Vec<u32> = Vec::new();
262        let mut expected: Vec<u32> = Vec::with_capacity(input.len());
263        for &old in &input {
264            let pos = expected_new_to_old
265                .iter()
266                .position(|&x| x == old)
267                .unwrap_or_else(|| {
268                    expected_new_to_old.push(old);
269                    expected_new_to_old.len() - 1
270                });
271            expected.push(u32::try_from(pos).expect("test bookkeeping fits u32"));
272        }
273        assert_eq!(fast.membership, expected);
274        assert_eq!(fast.new_to_old, expected_new_to_old);
275    }
276
277    #[cfg(all(test, feature = "proptest-harness"))]
278    mod prop {
279        use super::*;
280        use proptest::prelude::*;
281
282        prop_compose! {
283            fn arb_membership()(
284                n in 1usize..=64,
285                seed in any::<u64>(),
286                max_id_kind in 0u32..3,
287            ) -> Vec<u32> {
288                let mut rng: u64 = seed.wrapping_add(0xA5A5_5A5A_DEAD_BEEF);
289                let max_id: u32 = match max_id_kind {
290                    0 => (n as u32).saturating_sub(1).max(1),       // fast path
291                    1 => (n as u32).saturating_mul(4).max(1),       // straddles boundary
292                    _ => 1_000_000,                                  // sparse path
293                };
294                (0..n).map(|_| {
295                    rng = rng.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1);
296                    (rng >> 32) as u32 % (max_id + 1)
297                }).collect()
298            }
299        }
300
301        proptest! {
302            #![proptest_config(ProptestConfig { cases: 80, ..ProptestConfig::default() })]
303
304            #[test]
305            fn partition_preserved(input in arb_membership()) {
306                let r = reindex_membership(&input).unwrap();
307                prop_assert_eq!(r.membership.len(), input.len());
308                for i in 0..input.len() {
309                    for j in (i + 1)..input.len() {
310                        prop_assert_eq!(
311                            input[i] == input[j],
312                            r.membership[i] == r.membership[j],
313                        );
314                    }
315                }
316            }
317
318            #[test]
319            fn ids_are_contiguous(input in arb_membership()) {
320                let r = reindex_membership(&input).unwrap();
321                let k = r.nb_clusters();
322                for &c in &r.membership {
323                    prop_assert!(c < k);
324                }
325                // Every id in 0..k appears at least once.
326                for c in 0..k {
327                    prop_assert!(r.membership.contains(&c));
328                }
329            }
330
331            #[test]
332            fn new_to_old_round_trips(input in arb_membership()) {
333                let r = reindex_membership(&input).unwrap();
334                prop_assert_eq!(r.new_to_old.len(), r.nb_clusters() as usize);
335                for (i, &old) in input.iter().enumerate() {
336                    let new = r.membership[i] as usize;
337                    prop_assert_eq!(r.new_to_old[new], old);
338                }
339            }
340
341            #[test]
342            fn idempotent_when_already_dense(input in arb_membership()) {
343                let r1 = reindex_membership(&input).unwrap();
344                let r2 = reindex_membership(&r1.membership).unwrap();
345                prop_assert_eq!(r1.membership.clone(), r2.membership);
346                // After one pass, new_to_old of the second call is just
347                // (0..k) since the input is already 0..k in first-occurrence
348                // order.
349                let expected_n2o: Vec<u32> = (0..r1.nb_clusters()).collect();
350                prop_assert_eq!(r2.new_to_old, expected_n2o);
351            }
352        }
353    }
354}