use std::collections::BTreeMap;
use crate::core::error::IgraphResult;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ReindexMembershipResult {
pub membership: Vec<u32>,
pub new_to_old: Vec<u32>,
}
impl ReindexMembershipResult {
#[must_use]
pub fn nb_clusters(&self) -> u32 {
u32::try_from(self.new_to_old.len()).unwrap_or(u32::MAX)
}
}
pub fn reindex_membership(membership: &[u32]) -> IgraphResult<ReindexMembershipResult> {
let n = membership.len();
if n == 0 {
return Ok(ReindexMembershipResult {
membership: Vec::new(),
new_to_old: Vec::new(),
});
}
let mut max_id: u32 = 0;
for &c in membership {
if c > max_id {
max_id = c;
}
}
if (max_id as usize) < n {
let mut lookup: Vec<u32> = vec![0; n];
let mut new_to_old: Vec<u32> = Vec::new();
let mut out: Vec<u32> = vec![0; n];
let mut next_new: u32 = 0;
for (i, &old) in membership.iter().enumerate() {
let slot = &mut lookup[old as usize];
if *slot == 0 {
*slot = next_new + 1;
new_to_old.push(old);
next_new = next_new.saturating_add(1);
}
out[i] = *slot - 1;
}
return Ok(ReindexMembershipResult {
membership: out,
new_to_old,
});
}
let mut lookup: BTreeMap<u32, u32> = BTreeMap::new();
let mut new_to_old: Vec<u32> = Vec::new();
let mut out: Vec<u32> = vec![0; n];
let mut next_new: u32 = 0;
for (i, &old) in membership.iter().enumerate() {
let new_id = if let Some(&nid) = lookup.get(&old) {
nid
} else {
let nid = next_new;
lookup.insert(old, nid);
new_to_old.push(old);
next_new = next_new.saturating_add(1);
nid
};
out[i] = new_id;
}
Ok(ReindexMembershipResult {
membership: out,
new_to_old,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_membership() {
let r = reindex_membership(&[]).unwrap();
assert!(r.membership.is_empty());
assert!(r.new_to_old.is_empty());
assert_eq!(r.nb_clusters(), 0);
}
#[test]
fn already_densified_identity() {
let r = reindex_membership(&[0, 1, 2, 0, 1, 2]).unwrap();
assert_eq!(r.membership, vec![0, 1, 2, 0, 1, 2]);
assert_eq!(r.new_to_old, vec![0, 1, 2]);
}
#[test]
fn singleton_cluster_collapses_to_zero() {
let r = reindex_membership(&[42, 42, 42, 42]).unwrap();
assert_eq!(r.membership, vec![0, 0, 0, 0]);
assert_eq!(r.new_to_old, vec![42]);
assert_eq!(r.nb_clusters(), 1);
}
#[test]
fn gaps_are_compressed() {
let r = reindex_membership(&[5, 5, 10, 5, 0]).unwrap();
assert_eq!(r.membership, vec![0, 0, 1, 0, 2]);
assert_eq!(r.new_to_old, vec![5, 10, 0]);
}
#[test]
fn reverse_order_relabels_to_descending_in_new_to_old() {
let r = reindex_membership(&[2, 1, 0]).unwrap();
assert_eq!(r.membership, vec![0, 1, 2]);
assert_eq!(r.new_to_old, vec![2, 1, 0]);
}
#[test]
fn singletons() {
let r = reindex_membership(&[7, 8, 9, 10]).unwrap();
assert_eq!(r.membership, vec![0, 1, 2, 3]);
assert_eq!(r.new_to_old, vec![7, 8, 9, 10]);
}
#[test]
fn large_id_takes_btreemap_fallback() {
let r = reindex_membership(&[1_000_000, 7, 1_000_000, 7]).unwrap();
assert_eq!(r.membership, vec![0, 1, 0, 1]);
assert_eq!(r.new_to_old, vec![1_000_000, 7]);
}
#[test]
fn fast_path_max_id_equals_n_minus_1() {
let r = reindex_membership(&[3, 0, 1, 2]).unwrap();
assert_eq!(r.membership, vec![0, 1, 2, 3]);
assert_eq!(r.new_to_old, vec![3, 0, 1, 2]);
}
#[test]
fn fast_path_max_id_equals_n_takes_sparse_branch() {
let r = reindex_membership(&[4, 0, 1, 2]).unwrap();
assert_eq!(r.membership, vec![0, 1, 2, 3]);
assert_eq!(r.new_to_old, vec![4, 0, 1, 2]);
}
#[test]
fn preserves_partition_under_relabel() {
let inputs: &[&[u32]] = &[
&[0, 0, 1, 2, 1, 2, 0],
&[5, 5, 1_000, 7, 1_000, 7, 5],
&[u32::MAX, 0, u32::MAX, 1, 0],
];
for input in inputs {
let r = reindex_membership(input).unwrap();
for i in 0..input.len() {
for j in 0..input.len() {
assert_eq!(
input[i] == input[j],
r.membership[i] == r.membership[j],
"partition diverged at ({i}, {j}) for input {input:?}",
);
}
}
let k = r.nb_clusters();
for &c in &r.membership {
assert!(c < k);
}
for c in 0..k {
assert!(r.membership.contains(&c));
}
assert_eq!(r.new_to_old.len(), k as usize);
}
}
#[test]
fn fast_path_matches_sparse_path_for_in_range_ids() {
let input: Vec<u32> = vec![3, 0, 3, 1, 2, 0, 1];
let fast = reindex_membership(&input).unwrap();
let len_u32 = u32::try_from(input.len()).expect("test input length fits u32");
assert!(input.iter().copied().max().unwrap_or(0) < len_u32);
let mut expected_new_to_old: Vec<u32> = Vec::new();
let mut expected: Vec<u32> = Vec::with_capacity(input.len());
for &old in &input {
let pos = expected_new_to_old
.iter()
.position(|&x| x == old)
.unwrap_or_else(|| {
expected_new_to_old.push(old);
expected_new_to_old.len() - 1
});
expected.push(u32::try_from(pos).expect("test bookkeeping fits u32"));
}
assert_eq!(fast.membership, expected);
assert_eq!(fast.new_to_old, expected_new_to_old);
}
#[cfg(all(test, feature = "proptest-harness"))]
mod prop {
use super::*;
use proptest::prelude::*;
prop_compose! {
fn arb_membership()(
n in 1usize..=64,
seed in any::<u64>(),
max_id_kind in 0u32..3,
) -> Vec<u32> {
let mut rng: u64 = seed.wrapping_add(0xA5A5_5A5A_DEAD_BEEF);
let max_id: u32 = match max_id_kind {
0 => (n as u32).saturating_sub(1).max(1), 1 => (n as u32).saturating_mul(4).max(1), _ => 1_000_000, };
(0..n).map(|_| {
rng = rng.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1);
(rng >> 32) as u32 % (max_id + 1)
}).collect()
}
}
proptest! {
#![proptest_config(ProptestConfig { cases: 80, ..ProptestConfig::default() })]
#[test]
fn partition_preserved(input in arb_membership()) {
let r = reindex_membership(&input).unwrap();
prop_assert_eq!(r.membership.len(), input.len());
for i in 0..input.len() {
for j in (i + 1)..input.len() {
prop_assert_eq!(
input[i] == input[j],
r.membership[i] == r.membership[j],
);
}
}
}
#[test]
fn ids_are_contiguous(input in arb_membership()) {
let r = reindex_membership(&input).unwrap();
let k = r.nb_clusters();
for &c in &r.membership {
prop_assert!(c < k);
}
for c in 0..k {
prop_assert!(r.membership.contains(&c));
}
}
#[test]
fn new_to_old_round_trips(input in arb_membership()) {
let r = reindex_membership(&input).unwrap();
prop_assert_eq!(r.new_to_old.len(), r.nb_clusters() as usize);
for (i, &old) in input.iter().enumerate() {
let new = r.membership[i] as usize;
prop_assert_eq!(r.new_to_old[new], old);
}
}
#[test]
fn idempotent_when_already_dense(input in arb_membership()) {
let r1 = reindex_membership(&input).unwrap();
let r2 = reindex_membership(&r1.membership).unwrap();
prop_assert_eq!(r1.membership.clone(), r2.membership);
let expected_n2o: Vec<u32> = (0..r1.nb_clusters()).collect();
prop_assert_eq!(r2.new_to_old, expected_n2o);
}
}
}
}