rust_igraph/algorithms/community/
reindex_membership.rs1use std::collections::BTreeMap;
23
24use crate::core::error::IgraphResult;
25
26#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct ReindexMembershipResult {
32 pub membership: Vec<u32>,
35 pub new_to_old: Vec<u32>,
38}
39
40impl ReindexMembershipResult {
41 #[must_use]
43 pub fn nb_clusters(&self) -> u32 {
44 u32::try_from(self.new_to_old.len()).unwrap_or(u32::MAX)
48 }
49}
50
51pub fn reindex_membership(membership: &[u32]) -> IgraphResult<ReindexMembershipResult> {
74 let n = membership.len();
75
76 if n == 0 {
78 return Ok(ReindexMembershipResult {
79 membership: Vec::new(),
80 new_to_old: Vec::new(),
81 });
82 }
83
84 let mut max_id: u32 = 0;
88 for &c in membership {
89 if c > max_id {
90 max_id = c;
91 }
92 }
93
94 if (max_id as usize) < n {
95 let mut lookup: Vec<u32> = vec![0; n];
96 let mut new_to_old: Vec<u32> = Vec::new();
97 let mut out: Vec<u32> = vec![0; n];
98 let mut next_new: u32 = 0;
99
100 for (i, &old) in membership.iter().enumerate() {
101 let slot = &mut lookup[old as usize];
102 if *slot == 0 {
103 *slot = next_new + 1;
104 new_to_old.push(old);
105 next_new = next_new.saturating_add(1);
106 }
107 out[i] = *slot - 1;
108 }
109
110 return Ok(ReindexMembershipResult {
111 membership: out,
112 new_to_old,
113 });
114 }
115
116 let mut lookup: BTreeMap<u32, u32> = BTreeMap::new();
118 let mut new_to_old: Vec<u32> = Vec::new();
119 let mut out: Vec<u32> = vec![0; n];
120 let mut next_new: u32 = 0;
121
122 for (i, &old) in membership.iter().enumerate() {
123 let new_id = if let Some(&nid) = lookup.get(&old) {
124 nid
125 } else {
126 let nid = next_new;
127 lookup.insert(old, nid);
128 new_to_old.push(old);
129 next_new = next_new.saturating_add(1);
130 nid
131 };
132 out[i] = new_id;
133 }
134
135 Ok(ReindexMembershipResult {
136 membership: out,
137 new_to_old,
138 })
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144
145 #[test]
146 fn empty_membership() {
147 let r = reindex_membership(&[]).unwrap();
148 assert!(r.membership.is_empty());
149 assert!(r.new_to_old.is_empty());
150 assert_eq!(r.nb_clusters(), 0);
151 }
152
153 #[test]
154 fn already_densified_identity() {
155 let r = reindex_membership(&[0, 1, 2, 0, 1, 2]).unwrap();
156 assert_eq!(r.membership, vec![0, 1, 2, 0, 1, 2]);
157 assert_eq!(r.new_to_old, vec![0, 1, 2]);
158 }
159
160 #[test]
161 fn singleton_cluster_collapses_to_zero() {
162 let r = reindex_membership(&[42, 42, 42, 42]).unwrap();
163 assert_eq!(r.membership, vec![0, 0, 0, 0]);
164 assert_eq!(r.new_to_old, vec![42]);
165 assert_eq!(r.nb_clusters(), 1);
166 }
167
168 #[test]
169 fn gaps_are_compressed() {
170 let r = reindex_membership(&[5, 5, 10, 5, 0]).unwrap();
172 assert_eq!(r.membership, vec![0, 0, 1, 0, 2]);
173 assert_eq!(r.new_to_old, vec![5, 10, 0]);
174 }
175
176 #[test]
177 fn reverse_order_relabels_to_descending_in_new_to_old() {
178 let r = reindex_membership(&[2, 1, 0]).unwrap();
181 assert_eq!(r.membership, vec![0, 1, 2]);
182 assert_eq!(r.new_to_old, vec![2, 1, 0]);
183 }
184
185 #[test]
186 fn singletons() {
187 let r = reindex_membership(&[7, 8, 9, 10]).unwrap();
188 assert_eq!(r.membership, vec![0, 1, 2, 3]);
189 assert_eq!(r.new_to_old, vec![7, 8, 9, 10]);
190 }
191
192 #[test]
193 fn large_id_takes_btreemap_fallback() {
194 let r = reindex_membership(&[1_000_000, 7, 1_000_000, 7]).unwrap();
196 assert_eq!(r.membership, vec![0, 1, 0, 1]);
197 assert_eq!(r.new_to_old, vec![1_000_000, 7]);
198 }
199
200 #[test]
201 fn fast_path_max_id_equals_n_minus_1() {
202 let r = reindex_membership(&[3, 0, 1, 2]).unwrap();
204 assert_eq!(r.membership, vec![0, 1, 2, 3]);
205 assert_eq!(r.new_to_old, vec![3, 0, 1, 2]);
206 }
207
208 #[test]
209 fn fast_path_max_id_equals_n_takes_sparse_branch() {
210 let r = reindex_membership(&[4, 0, 1, 2]).unwrap();
213 assert_eq!(r.membership, vec![0, 1, 2, 3]);
214 assert_eq!(r.new_to_old, vec![4, 0, 1, 2]);
215 }
216
217 #[test]
218 fn preserves_partition_under_relabel() {
219 let inputs: &[&[u32]] = &[
222 &[0, 0, 1, 2, 1, 2, 0],
223 &[5, 5, 1_000, 7, 1_000, 7, 5],
224 &[u32::MAX, 0, u32::MAX, 1, 0],
225 ];
226 for input in inputs {
227 let r = reindex_membership(input).unwrap();
228 for i in 0..input.len() {
229 for j in 0..input.len() {
230 assert_eq!(
231 input[i] == input[j],
232 r.membership[i] == r.membership[j],
233 "partition diverged at ({i}, {j}) for input {input:?}",
234 );
235 }
236 }
237 let k = r.nb_clusters();
239 for &c in &r.membership {
240 assert!(c < k);
241 }
242 for c in 0..k {
243 assert!(r.membership.contains(&c));
244 }
245 assert_eq!(r.new_to_old.len(), k as usize);
246 }
247 }
248
249 #[test]
250 fn fast_path_matches_sparse_path_for_in_range_ids() {
251 let input: Vec<u32> = vec![3, 0, 3, 1, 2, 0, 1];
256 let fast = reindex_membership(&input).unwrap();
257 let len_u32 = u32::try_from(input.len()).expect("test input length fits u32");
258 assert!(input.iter().copied().max().unwrap_or(0) < len_u32);
259 let mut expected_new_to_old: Vec<u32> = Vec::new();
262 let mut expected: Vec<u32> = Vec::with_capacity(input.len());
263 for &old in &input {
264 let pos = expected_new_to_old
265 .iter()
266 .position(|&x| x == old)
267 .unwrap_or_else(|| {
268 expected_new_to_old.push(old);
269 expected_new_to_old.len() - 1
270 });
271 expected.push(u32::try_from(pos).expect("test bookkeeping fits u32"));
272 }
273 assert_eq!(fast.membership, expected);
274 assert_eq!(fast.new_to_old, expected_new_to_old);
275 }
276
277 #[cfg(all(test, feature = "proptest-harness"))]
278 mod prop {
279 use super::*;
280 use proptest::prelude::*;
281
282 prop_compose! {
283 fn arb_membership()(
284 n in 1usize..=64,
285 seed in any::<u64>(),
286 max_id_kind in 0u32..3,
287 ) -> Vec<u32> {
288 let mut rng: u64 = seed.wrapping_add(0xA5A5_5A5A_DEAD_BEEF);
289 let max_id: u32 = match max_id_kind {
290 0 => (n as u32).saturating_sub(1).max(1), 1 => (n as u32).saturating_mul(4).max(1), _ => 1_000_000, };
294 (0..n).map(|_| {
295 rng = rng.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1);
296 (rng >> 32) as u32 % (max_id + 1)
297 }).collect()
298 }
299 }
300
301 proptest! {
302 #![proptest_config(ProptestConfig { cases: 80, ..ProptestConfig::default() })]
303
304 #[test]
305 fn partition_preserved(input in arb_membership()) {
306 let r = reindex_membership(&input).unwrap();
307 prop_assert_eq!(r.membership.len(), input.len());
308 for i in 0..input.len() {
309 for j in (i + 1)..input.len() {
310 prop_assert_eq!(
311 input[i] == input[j],
312 r.membership[i] == r.membership[j],
313 );
314 }
315 }
316 }
317
318 #[test]
319 fn ids_are_contiguous(input in arb_membership()) {
320 let r = reindex_membership(&input).unwrap();
321 let k = r.nb_clusters();
322 for &c in &r.membership {
323 prop_assert!(c < k);
324 }
325 for c in 0..k {
327 prop_assert!(r.membership.contains(&c));
328 }
329 }
330
331 #[test]
332 fn new_to_old_round_trips(input in arb_membership()) {
333 let r = reindex_membership(&input).unwrap();
334 prop_assert_eq!(r.new_to_old.len(), r.nb_clusters() as usize);
335 for (i, &old) in input.iter().enumerate() {
336 let new = r.membership[i] as usize;
337 prop_assert_eq!(r.new_to_old[new], old);
338 }
339 }
340
341 #[test]
342 fn idempotent_when_already_dense(input in arb_membership()) {
343 let r1 = reindex_membership(&input).unwrap();
344 let r2 = reindex_membership(&r1.membership).unwrap();
345 prop_assert_eq!(r1.membership.clone(), r2.membership);
346 let expected_n2o: Vec<u32> = (0..r1.nb_clusters()).collect();
350 prop_assert_eq!(r2.new_to_old, expected_n2o);
351 }
352 }
353 }
354}