use std::collections::HashMap;
use rand::prelude::*;
use super::distance::DistanceMetric;
use super::util::UnionFind;
use crate::error::{Error, Result};
#[derive(Debug, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SignedEdge {
pub i: usize,
pub j: usize,
pub weight: f32,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct CorrelationResult {
pub labels: Vec<usize>,
pub n_clusters: usize,
pub cost: f64,
}
#[derive(Debug, Clone)]
pub struct CorrelationClustering {
max_iter: usize,
seed: Option<u64>,
}
impl Default for CorrelationClustering {
fn default() -> Self {
Self::new()
}
}
impl CorrelationClustering {
pub fn new() -> Self {
Self {
max_iter: 100,
seed: None,
}
}
pub fn with_seed(mut self, seed: u64) -> Self {
self.seed = Some(seed);
self
}
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
pub fn fit(&self, n_items: usize, edges: &[SignedEdge]) -> Result<CorrelationResult> {
if n_items == 0 {
return Ok(CorrelationResult {
labels: Vec::new(),
n_clusters: 0,
cost: 0.0,
});
}
for edge in edges {
if edge.i >= n_items || edge.j >= n_items {
return Err(Error::InvalidParameter {
name: "edge index",
message: "exceeds n_items",
});
}
}
let adj = Self::build_adj(n_items, edges);
let mapping = precluster(n_items, &adj, 0.5);
let (n_contracted, contracted_edges) = contract_graph(n_items, edges, &mapping);
let contracted_adj = Self::build_adj(n_contracted, &contracted_edges);
let mut labels1 = self.pivot(n_contracted, &contracted_adj);
if self.max_iter > 0 {
Self::merge_pass(n_contracted, &contracted_adj, &mut labels1);
self.local_search(n_contracted, &contracted_adj, &mut labels1);
}
let cost1 = compute_cost(&labels1, &contracted_edges);
let flipped_edges: Vec<SignedEdge> = contracted_edges
.iter()
.map(|e| {
if labels1[e.i] != labels1[e.j] {
SignedEdge {
weight: e.weight * 2.0,
..*e
}
} else {
*e
}
})
.collect();
let adj2 = Self::build_adj(n_contracted, &flipped_edges);
let mut labels2 = self.pivot(n_contracted, &adj2);
if self.max_iter > 0 {
Self::merge_pass(n_contracted, &adj2, &mut labels2);
self.local_search(n_contracted, &adj2, &mut labels2);
}
let cost2 = compute_cost(&labels2, &contracted_edges);
let contracted_labels = if cost2 < cost1 { labels2 } else { labels1 };
let mut labels = vec![0usize; n_items];
for i in 0..n_items {
labels[i] = contracted_labels[mapping[i]];
}
let cost = compute_cost(&labels, edges);
let (labels, n_clusters) = relabel_contiguous(&labels);
Ok(CorrelationResult {
labels,
n_clusters,
cost,
})
}
fn build_adj(n_items: usize, edges: &[SignedEdge]) -> Vec<Vec<(usize, f32)>> {
let mut adj: Vec<Vec<(usize, f32)>> = vec![Vec::new(); n_items];
for edge in edges {
adj[edge.i].push((edge.j, edge.weight));
adj[edge.j].push((edge.i, edge.weight));
}
adj
}
pub fn edges_from_distances<D: DistanceMetric>(
data: &[Vec<f32>],
metric: &D,
threshold: f32,
) -> Vec<SignedEdge> {
let n = data.len();
let mut edges = Vec::with_capacity(n * (n - 1) / 2);
for i in 0..n {
for j in (i + 1)..n {
let dist = metric.distance(&data[i], &data[j]);
let weight = threshold - dist;
edges.push(SignedEdge { i, j, weight });
}
}
edges
}
fn pivot(&self, n_items: usize, adj: &[Vec<(usize, f32)>]) -> Vec<usize> {
let mut rng = match self.seed {
Some(s) => StdRng::seed_from_u64(s),
None => StdRng::from_os_rng(),
};
let mut assigned = vec![false; n_items];
let mut labels = vec![0usize; n_items];
let mut order: Vec<usize> = (0..n_items).collect();
order.shuffle(&mut rng);
let mut next_cluster = 0usize;
for &pivot in &order {
if assigned[pivot] {
continue;
}
let cluster_id = next_cluster;
next_cluster += 1;
assigned[pivot] = true;
labels[pivot] = cluster_id;
for &(neighbor, weight) in &adj[pivot] {
if !assigned[neighbor] && weight > 0.0 {
assigned[neighbor] = true;
labels[neighbor] = cluster_id;
}
}
}
labels
}
fn merge_pass(n_items: usize, adj: &[Vec<(usize, f32)>], labels: &mut [usize]) {
loop {
let mut pair_deltas: HashMap<(usize, usize), f64> = HashMap::new();
for i in 0..n_items {
for &(j, w) in &adj[i] {
let (ca, cb) = (labels[i], labels[j]);
if ca == cb || i > j {
continue; }
let pair = (ca.min(cb), ca.max(cb));
*pair_deltas.entry(pair).or_insert(0.0) -= w as f64;
}
}
let best = pair_deltas
.iter()
.min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal));
match best {
Some((&(from, to), &delta)) if delta < 0.0 => {
for label in labels[..n_items].iter_mut() {
if *label == from {
*label = to;
}
}
}
_ => break,
}
}
}
fn local_search(&self, n_items: usize, adj: &[Vec<(usize, f32)>], labels: &mut [usize]) {
let mut next_singleton = labels.iter().copied().max().unwrap_or(0) + 1;
let mut best_move: Vec<(usize, f64)> = Vec::with_capacity(n_items);
let mut stale = vec![true; n_items];
for _ in 0..self.max_iter {
let mut improved = false;
if best_move.len() < n_items {
best_move.resize(n_items, (0, 0.0));
}
for item in 0..n_items {
if stale[item] {
let current = labels[item];
let mut best_target = current;
let mut best_delta = 0.0f64;
let singleton = next_singleton;
let delta_s = move_delta(item, current, singleton, adj, labels);
if delta_s < best_delta {
best_delta = delta_s;
best_target = singleton;
}
for &(neighbor, _) in &adj[item] {
let c = labels[neighbor];
if c == current {
continue;
}
let d = move_delta(item, current, c, adj, labels);
if d < best_delta {
best_delta = d;
best_target = c;
}
}
best_move[item] = (best_target, best_delta);
stale[item] = false;
}
let (target, delta) = best_move[item];
if delta < 0.0 && target != labels[item] {
labels[item] = target;
if target == next_singleton {
next_singleton += 1;
}
improved = true;
stale[item] = true;
for &(neighbor, _) in &adj[item] {
stale[neighbor] = true;
}
}
}
if !improved {
break;
}
}
}
}
fn precluster(n_items: usize, adj: &[Vec<(usize, f32)>], threshold: f32) -> Vec<usize> {
let mut pos_neighbors: Vec<Vec<usize>> = Vec::with_capacity(n_items);
for neighbors in &adj[..n_items] {
let mut pn: Vec<usize> = neighbors
.iter()
.filter(|&&(_, w)| w > 0.0)
.map(|&(j, _)| j)
.collect();
pn.sort_unstable();
pos_neighbors.push(pn);
}
let mut uf = UnionFind::new(n_items);
for u in 0..n_items {
for &(v, w) in &adj[u] {
if v <= u || w <= 0.0 {
continue;
}
if uf.find(u) == uf.find(v) {
continue;
}
let intersection = sorted_intersection_count(&pos_neighbors[u], &pos_neighbors[v]);
let union_size = pos_neighbors[u].len() + pos_neighbors[v].len() - intersection;
if union_size == 0 {
continue;
}
let jaccard = intersection as f32 / union_size as f32;
if jaccard >= threshold {
uf.union(u, v);
}
}
}
let mut root_to_idx: HashMap<usize, usize> = HashMap::new();
let mut next_idx = 0usize;
let mut mapping = vec![0usize; n_items];
for (i, slot) in mapping.iter_mut().enumerate() {
let root = uf.find(i);
let idx = *root_to_idx.entry(root).or_insert_with(|| {
let id = next_idx;
next_idx += 1;
id
});
*slot = idx;
}
mapping
}
fn sorted_intersection_count(a: &[usize], b: &[usize]) -> usize {
let (mut i, mut j) = (0, 0);
let mut count = 0;
while i < a.len() && j < b.len() {
match a[i].cmp(&b[j]) {
std::cmp::Ordering::Less => i += 1,
std::cmp::Ordering::Greater => j += 1,
std::cmp::Ordering::Equal => {
count += 1;
i += 1;
j += 1;
}
}
}
count
}
fn contract_graph(
n_items: usize,
edges: &[SignedEdge],
mapping: &[usize],
) -> (usize, Vec<SignedEdge>) {
let n_contracted = mapping.iter().copied().max().map_or(0, |m| m + 1);
if n_contracted == n_items {
return (n_items, edges.to_vec());
}
let mut pair_weights: HashMap<(usize, usize), f32> = HashMap::new();
for edge in edges {
let ci = mapping[edge.i];
let cj = mapping[edge.j];
if ci == cj {
continue; }
let key = (ci.min(cj), ci.max(cj));
*pair_weights.entry(key).or_insert(0.0) += edge.weight;
}
let contracted_edges: Vec<SignedEdge> = pair_weights
.into_iter()
.map(|((i, j), weight)| SignedEdge { i, j, weight })
.collect();
(n_contracted, contracted_edges)
}
#[inline]
fn move_delta(
item: usize,
from: usize,
to: usize,
adj: &[Vec<(usize, f32)>],
labels: &[usize],
) -> f64 {
let mut delta = 0.0f64;
for &(neighbor, weight) in &adj[item] {
let nc = labels[neighbor];
let w = weight as f64;
if nc == from {
delta += w; } else if nc == to {
delta -= w;
}
}
delta
}
fn compute_cost(labels: &[usize], edges: &[SignedEdge]) -> f64 {
let mut cost = 0.0f64;
for edge in edges {
let same = labels[edge.i] == labels[edge.j];
if (edge.weight > 0.0 && !same) || (edge.weight < 0.0 && same) {
cost += (edge.weight as f64).abs();
}
}
cost
}
fn relabel_contiguous(labels: &[usize]) -> (Vec<usize>, usize) {
let mut map: HashMap<usize, usize> = HashMap::new();
let mut next = 0usize;
let mut out = Vec::with_capacity(labels.len());
for &label in labels {
let new_label = *map.entry(label).or_insert_with(|| {
let id = next;
next += 1;
id
});
out.push(new_label);
}
(out, next)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn all_positive_edges_single_cluster() {
let edges = vec![
SignedEdge {
i: 0,
j: 1,
weight: 1.0,
},
SignedEdge {
i: 0,
j: 2,
weight: 2.0,
},
SignedEdge {
i: 1,
j: 2,
weight: 1.5,
},
];
let result = CorrelationClustering::new()
.with_seed(42)
.fit(3, &edges)
.expect("fit should succeed");
assert_eq!(result.n_clusters, 1);
assert_eq!(result.labels[0], result.labels[1]);
assert_eq!(result.labels[1], result.labels[2]);
assert!((result.cost - 0.0).abs() < 1e-9);
}
#[test]
fn all_negative_edges_singletons() {
let edges = vec![
SignedEdge {
i: 0,
j: 1,
weight: -1.0,
},
SignedEdge {
i: 0,
j: 2,
weight: -2.0,
},
SignedEdge {
i: 1,
j: 2,
weight: -1.5,
},
];
let result = CorrelationClustering::new()
.with_seed(42)
.fit(3, &edges)
.expect("fit should succeed");
assert_eq!(result.n_clusters, 3);
assert_ne!(result.labels[0], result.labels[1]);
assert_ne!(result.labels[0], result.labels[2]);
assert_ne!(result.labels[1], result.labels[2]);
assert!((result.cost - 0.0).abs() < 1e-9);
}
#[test]
fn two_clear_groups() {
let mut edges = Vec::new();
let group_a = [0, 1, 2];
let group_b = [3, 4, 5];
for ii in 0..group_a.len() {
for jj in (ii + 1)..group_a.len() {
edges.push(SignedEdge {
i: group_a[ii],
j: group_a[jj],
weight: 5.0,
});
}
}
for ii in 0..group_b.len() {
for jj in (ii + 1)..group_b.len() {
edges.push(SignedEdge {
i: group_b[ii],
j: group_b[jj],
weight: 5.0,
});
}
}
for &a in &group_a {
for &b in &group_b {
edges.push(SignedEdge {
i: a,
j: b,
weight: -3.0,
});
}
}
let result = CorrelationClustering::new()
.with_seed(7)
.fit(6, &edges)
.expect("fit should succeed");
assert_eq!(result.n_clusters, 2);
assert_eq!(result.labels[0], result.labels[1]);
assert_eq!(result.labels[1], result.labels[2]);
assert_eq!(result.labels[3], result.labels[4]);
assert_eq!(result.labels[4], result.labels[5]);
assert_ne!(result.labels[0], result.labels[3]);
assert!((result.cost - 0.0).abs() < 1e-9);
}
#[test]
fn disconnected_items_are_singletons() {
let edges = vec![
SignedEdge {
i: 0,
j: 1,
weight: 1.0,
},
SignedEdge {
i: 2,
j: 3,
weight: 1.0,
},
];
let result = CorrelationClustering::new()
.with_seed(42)
.fit(5, &edges)
.expect("fit should succeed");
assert_eq!(result.labels[0], result.labels[1]);
assert_eq!(result.labels[2], result.labels[3]);
assert_ne!(result.labels[0], result.labels[2]);
assert_ne!(result.labels[4], result.labels[0]);
assert_ne!(result.labels[4], result.labels[2]);
assert_eq!(result.n_clusters, 3);
}
#[test]
fn deterministic_with_seed() {
let edges = vec![
SignedEdge {
i: 0,
j: 1,
weight: 1.0,
},
SignedEdge {
i: 1,
j: 2,
weight: -0.5,
},
SignedEdge {
i: 2,
j: 3,
weight: 1.0,
},
SignedEdge {
i: 0,
j: 3,
weight: -0.5,
},
SignedEdge {
i: 0,
j: 2,
weight: 0.3,
},
SignedEdge {
i: 1,
j: 3,
weight: -0.2,
},
];
let r1 = CorrelationClustering::new()
.with_seed(99)
.fit(4, &edges)
.expect("fit should succeed");
let r2 = CorrelationClustering::new()
.with_seed(99)
.fit(4, &edges)
.expect("fit should succeed");
assert_eq!(
r1.labels, r2.labels,
"same seed should produce identical results"
);
assert!((r1.cost - r2.cost).abs() < 1e-12);
}
#[test]
fn empty_edges_all_singletons() {
let result = CorrelationClustering::new()
.with_seed(42)
.fit(4, &[])
.expect("fit should succeed");
assert_eq!(result.n_clusters, 4);
assert_eq!(result.labels.len(), 4);
let mut unique = result.labels.clone();
unique.sort_unstable();
unique.dedup();
assert_eq!(unique.len(), 4);
assert!((result.cost - 0.0).abs() < 1e-9);
}
#[test]
fn single_item() {
let result = CorrelationClustering::new()
.with_seed(42)
.fit(1, &[])
.expect("fit should succeed");
assert_eq!(result.labels, vec![0]);
assert_eq!(result.n_clusters, 1);
assert!((result.cost - 0.0).abs() < 1e-9);
}
#[test]
fn local_search_improves_cost() {
let edges = vec![
SignedEdge {
i: 0,
j: 1,
weight: 5.0,
},
SignedEdge {
i: 2,
j: 3,
weight: 5.0,
},
SignedEdge {
i: 0,
j: 2,
weight: -3.0,
},
SignedEdge {
i: 0,
j: 3,
weight: -3.0,
},
SignedEdge {
i: 1,
j: 2,
weight: -3.0,
},
SignedEdge {
i: 1,
j: 3,
weight: -3.0,
},
];
let pivot_only = CorrelationClustering::new()
.with_max_iter(0)
.with_seed(42)
.fit(4, &edges)
.expect("fit should succeed");
let with_refinement = CorrelationClustering::new()
.with_max_iter(100)
.with_seed(42)
.fit(4, &edges)
.expect("fit should succeed");
assert!(
with_refinement.cost <= pivot_only.cost + 1e-9,
"local search should not increase cost: refined={}, pivot={}",
with_refinement.cost,
pivot_only.cost
);
}
#[test]
fn edges_from_distances_generates_correct_signs() {
use super::super::distance::Euclidean;
let data = vec![
vec![0.0f32, 0.0],
vec![0.1, 0.0], vec![10.0, 10.0], ];
let edges = CorrelationClustering::edges_from_distances(&data, &Euclidean, 1.0);
assert_eq!(edges.len(), 3);
let e01 = edges
.iter()
.find(|e| (e.i == 0 && e.j == 1) || (e.i == 1 && e.j == 0))
.expect("edge 0-1 should exist");
assert!(e01.weight > 0.0, "close pair should have positive weight");
let e02 = edges
.iter()
.find(|e| (e.i == 0 && e.j == 2) || (e.i == 2 && e.j == 0))
.expect("edge 0-2 should exist");
assert!(e02.weight < 0.0, "far pair should have negative weight");
let e12 = edges
.iter()
.find(|e| (e.i == 1 && e.j == 2) || (e.i == 2 && e.j == 1))
.expect("edge 1-2 should exist");
assert!(e12.weight < 0.0, "far pair should have negative weight");
}
#[test]
fn invalid_edge_index_error() {
let edges = vec![SignedEdge {
i: 0,
j: 5,
weight: 1.0,
}];
let result = CorrelationClustering::new().fit(3, &edges);
assert!(
result.is_err(),
"edge referencing item >= n_items should error"
);
if let Err(Error::InvalidParameter { name, .. }) = result {
assert_eq!(name, "edge index");
} else {
panic!("expected InvalidParameter error");
}
}
#[test]
fn precluster_merges_obvious_cliques() {
let mut edges = Vec::new();
let clique_a = [0, 1, 2, 3];
let clique_b = [4, 5, 6, 7];
for ii in 0..clique_a.len() {
for jj in (ii + 1)..clique_a.len() {
edges.push(SignedEdge {
i: clique_a[ii],
j: clique_a[jj],
weight: 5.0,
});
}
}
for ii in 0..clique_b.len() {
for jj in (ii + 1)..clique_b.len() {
edges.push(SignedEdge {
i: clique_b[ii],
j: clique_b[jj],
weight: 5.0,
});
}
}
for &a in &clique_a {
for &b in &clique_b {
edges.push(SignedEdge {
i: a,
j: b,
weight: -3.0,
});
}
}
let adj = CorrelationClustering::build_adj(8, &edges);
let mapping = precluster(8, &adj, 0.5);
assert_eq!(mapping[0], mapping[1], "K4 A should be preclustered");
assert_eq!(mapping[1], mapping[2], "K4 A should be preclustered");
assert_eq!(mapping[2], mapping[3], "K4 A should be preclustered");
assert_eq!(mapping[4], mapping[5], "K4 B should be preclustered");
assert_eq!(mapping[5], mapping[6], "K4 B should be preclustered");
assert_eq!(mapping[6], mapping[7], "K4 B should be preclustered");
assert_ne!(mapping[0], mapping[4], "the two K4s should be separate");
let result = CorrelationClustering::new()
.with_seed(42)
.fit(8, &edges)
.expect("fit should succeed");
assert_eq!(result.n_clusters, 2);
assert!((result.cost - 0.0).abs() < 1e-9);
}
#[test]
fn precluster_identity_when_no_overlap() {
let edges = vec![
SignedEdge {
i: 0,
j: 1,
weight: 1.0,
},
SignedEdge {
i: 0,
j: 2,
weight: 1.0,
},
SignedEdge {
i: 1,
j: 3,
weight: 1.0,
},
];
let adj = CorrelationClustering::build_adj(4, &edges);
let mapping = precluster(4, &adj, 0.5);
let n_contracted = mapping.iter().copied().max().unwrap_or(0) + 1;
assert_eq!(
n_contracted, 4,
"no pairs should be merged with disjoint neighborhoods"
);
}
#[test]
fn zero_items_returns_empty() {
let result = CorrelationClustering::new()
.fit(0, &[])
.expect("fit should succeed");
assert!(result.labels.is_empty());
assert_eq!(result.n_clusters, 0);
assert!((result.cost - 0.0).abs() < 1e-9);
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn labels_cover_all_items(n_items in 1usize..20) {
let edges: Vec<SignedEdge> = Vec::new();
let result = CorrelationClustering::new()
.with_seed(42)
.fit(n_items, &edges)
.unwrap();
prop_assert_eq!(result.labels.len(), n_items,
"every item should get a label");
}
#[test]
fn labels_are_contiguous(n_items in 2usize..10) {
let mut edges = Vec::new();
for i in 0..n_items {
for j in (i+1)..n_items {
let w = ((i * 3 + j * 5) % 7) as f32 - 3.0;
edges.push(SignedEdge { i, j, weight: w });
}
}
let result = CorrelationClustering::new()
.with_seed(42)
.fit(n_items, &edges)
.unwrap();
let max_label = result.labels.iter().copied().max().unwrap_or(0);
prop_assert!(max_label < result.n_clusters,
"max label {} should be < n_clusters {}", max_label, result.n_clusters);
let mut seen = vec![false; result.n_clusters];
for &l in &result.labels {
seen[l] = true;
}
for (i, &s) in seen.iter().enumerate() {
prop_assert!(s, "cluster label {} unused but n_clusters={}", i, result.n_clusters);
}
}
#[test]
fn labels_use_contiguous_range(n_items in 1usize..15) {
let result = CorrelationClustering::new()
.with_seed(42)
.fit(n_items, &[])
.unwrap();
let max_label = result.labels.iter().copied().max().unwrap_or(0);
prop_assert!(max_label < result.n_clusters,
"max label {} should be < n_clusters {}", max_label, result.n_clusters);
let mut seen = vec![false; result.n_clusters];
for &l in &result.labels {
seen[l] = true;
}
for (i, &s) in seen.iter().enumerate() {
prop_assert!(s, "cluster label {} not used but n_clusters={}", i, result.n_clusters);
}
}
#[test]
fn cost_matches_edges(n_items in 2usize..10) {
let mut edges = Vec::new();
for i in 0..n_items {
for j in (i+1)..n_items {
let w = ((i * 7 + j * 13) % 20) as f32 - 10.0;
edges.push(SignedEdge { i, j, weight: w });
}
}
let result = CorrelationClustering::new()
.with_seed(42)
.fit(n_items, &edges)
.unwrap();
let mut expected_cost = 0.0f64;
for edge in &edges {
let same = result.labels[edge.i] == result.labels[edge.j];
if (edge.weight > 0.0 && !same) || (edge.weight < 0.0 && same) {
expected_cost += (edge.weight as f64).abs();
}
}
prop_assert!(
(result.cost - expected_cost).abs() < 1e-6,
"reported cost {} != recomputed cost {}", result.cost, expected_cost
);
}
#[test]
fn all_positive_fewer_clusters(n_items in 3usize..12) {
let mut edges = Vec::new();
for i in 0..n_items {
for j in (i+1)..n_items {
edges.push(SignedEdge { i, j, weight: 2.0 });
}
}
let result = CorrelationClustering::new()
.with_seed(42)
.fit(n_items, &edges)
.unwrap();
prop_assert!(result.n_clusters <= n_items,
"n_clusters {} > n_items {}", result.n_clusters, n_items);
prop_assert!(result.n_clusters < n_items,
"strongly positive edges should merge at least one pair: \
n_clusters={}, n_items={}", result.n_clusters, n_items);
}
#[test]
fn precluster_cost_no_worse(n_items in 3usize..10) {
let mut edges = Vec::new();
for i in 0..n_items {
for j in (i+1)..n_items {
let w = ((i * 3 + j * 5) % 7) as f32 - 3.0;
edges.push(SignedEdge { i, j, weight: w });
}
}
let with_pre = CorrelationClustering::new()
.with_seed(42)
.fit(n_items, &edges)
.unwrap();
let adj = CorrelationClustering::build_adj(n_items, &edges);
let mapping = precluster(n_items, &adj, 2.0); let n_contracted = mapping.iter().copied().max().map_or(0, |m| m + 1);
prop_assert_eq!(n_contracted, n_items,
"threshold=2.0 should yield identity mapping");
let _ = with_pre.cost; }
#[test]
fn all_negative_many_clusters(n_items in 3usize..12) {
let mut edges = Vec::new();
for i in 0..n_items {
for j in (i+1)..n_items {
edges.push(SignedEdge { i, j, weight: -2.0 });
}
}
let result = CorrelationClustering::new()
.with_seed(42)
.fit(n_items, &edges)
.unwrap();
let min_expected = n_items / 2;
prop_assert!(result.n_clusters >= min_expected,
"all-negative edges: n_clusters {} < floor(n/2)={} for n_items={}",
result.n_clusters, min_expected, n_items);
}
}
}