use rand::rngs::StdRng;
use rand::seq::SliceRandom;
use rand::{Rng, SeedableRng};
use rustc_hash::FxHashSet;
use crate::error::{LeidenError, Result};
#[derive(Debug, Clone)]
pub struct LfrConfig {
pub n: usize,
pub tau1: f64,
pub tau2: f64,
pub mu: f64,
pub average_degree: Option<f64>,
pub min_degree: Option<usize>,
pub max_degree: Option<usize>,
pub min_community: Option<usize>,
pub max_community: Option<usize>,
pub seed: Option<u64>,
}
#[derive(Debug)]
pub struct LfrGraph {
pub edges: Vec<(usize, usize, f64)>,
pub node_count: usize,
pub ground_truth: Vec<usize>,
pub community_sizes: Vec<usize>,
}
pub fn generate_lfr_graph(config: LfrConfig) -> Result<LfrGraph> {
validate_config(&config)?;
let mut rng = match config.seed {
Some(seed) => StdRng::seed_from_u64(seed),
None => StdRng::from_rng(&mut rand::rng()),
};
let max_degree = config.max_degree.unwrap_or(config.n.saturating_sub(1));
let max_community = config.max_community.unwrap_or(config.n);
let min_degree = if let Some(min_deg) = config.min_degree {
min_deg
} else if let Some(avg_deg) = config.average_degree {
find_min_degree_for_average(config.tau1, avg_deg, max_degree)
} else {
5
};
let min_community = config.min_community.unwrap_or(std::cmp::max(min_degree, 4));
let degree_seq =
generate_degree_sequence(&mut rng, config.n, config.tau1, min_degree, max_degree);
let community_sizes = generate_community_sizes(
&mut rng,
config.n,
config.tau2,
min_community,
max_community,
);
let ground_truth = assign_communities(°ree_seq, &community_sizes, config.mu, &mut rng);
let edges = wire_edges(config.n, °ree_seq, &ground_truth, config.mu, &mut rng);
Ok(LfrGraph {
edges,
node_count: config.n,
ground_truth,
community_sizes,
})
}
fn validate_config(config: &LfrConfig) -> Result<()> {
if config.n == 0 {
return Err(LeidenError::InvalidParameter {
message: "n must be positive".to_string(),
});
}
if config.tau1 <= 1.0 {
return Err(LeidenError::InvalidParameter {
message: "tau1 must be > 1".to_string(),
});
}
if config.tau2 <= 1.0 {
return Err(LeidenError::InvalidParameter {
message: "tau2 must be > 1".to_string(),
});
}
if config.mu < 0.0 || config.mu > 1.0 {
return Err(LeidenError::InvalidParameter {
message: "mu must be in [0, 1]".to_string(),
});
}
Ok(())
}
fn powerlaw_sample(rng: &mut StdRng, exponent: f64, min_val: usize, max_val: usize) -> usize {
if min_val >= max_val {
return min_val;
}
loop {
let u: f64 = rng.random();
let x = (min_val as f64) * (1.0 - u).powf(1.0 / (1.0 - exponent));
let x = x.round() as usize;
if x >= min_val && x <= max_val {
return x;
}
}
}
fn find_min_degree_for_average(tau1: f64, target_avg: f64, max_degree: usize) -> usize {
let mut low = 1;
let mut high = max_degree;
for _ in 0..50 {
if low >= high {
break;
}
let mid = (low + high) / 2;
let mut test_rng = StdRng::seed_from_u64(42);
let mut sum = 0usize;
for _ in 0..1000 {
sum += powerlaw_sample(&mut test_rng, tau1, mid, max_degree);
}
let test_avg = sum as f64 / 1000.0;
if test_avg > target_avg {
high = mid;
} else {
low = mid + 1;
}
}
low.max(1)
}
fn generate_degree_sequence(
rng: &mut StdRng,
n: usize,
tau1: f64,
min_degree: usize,
max_degree: usize,
) -> Vec<usize> {
let mut degrees: Vec<usize> = (0..n)
.map(|_| powerlaw_sample(rng, tau1, min_degree, max_degree))
.collect();
let sum: usize = degrees.iter().sum();
if sum % 2 != 0 {
degrees[0] += 1;
}
degrees
}
fn generate_community_sizes(
rng: &mut StdRng,
n: usize,
tau2: f64,
min_comm: usize,
max_comm: usize,
) -> Vec<usize> {
let mut sizes = Vec::new();
let mut total = 0;
while total < n {
let remaining = n - total;
let effective_max = std::cmp::min(remaining, max_comm);
let effective_min = std::cmp::min(min_comm, remaining);
let s = powerlaw_sample(rng, tau2, effective_min, effective_max).min(remaining);
sizes.push(s);
total += s;
}
while sizes.len() > 1 && *sizes.last().unwrap() < min_comm {
let last = sizes.pop().unwrap();
*sizes.last_mut().unwrap() += last;
}
sizes
}
fn assign_communities(
degree_seq: &[usize],
community_sizes: &[usize],
mu: f64,
rng: &mut StdRng,
) -> Vec<usize> {
let n = degree_seq.len();
let num_comms = community_sizes.len();
let mut community_members: Vec<Vec<usize>> = vec![Vec::new(); num_comms];
let mut node_community: Vec<usize> = vec![0; n];
let mut unassigned: Vec<usize> = (0..n).collect();
unassigned.shuffle(rng);
let max_iters = n * 100;
for _ in 0..max_iters {
if unassigned.is_empty() {
break;
}
let node = unassigned.pop().unwrap();
let intra_degree = ((1.0 - mu) * degree_seq[node] as f64).round() as usize;
let mut assigned = false;
for _ in 0..num_comms {
let c = rng.random_range(0..num_comms);
if community_members[c].len() < community_sizes[c] && community_sizes[c] > intra_degree
{
community_members[c].push(node);
node_community[node] = c;
assigned = true;
break;
}
}
if !assigned {
for c in 0..num_comms {
if community_members[c].len() < community_sizes[c]
&& community_sizes[c] > intra_degree
{
community_members[c].push(node);
node_community[node] = c;
assigned = true;
break;
}
}
}
if !assigned {
let largest = community_sizes
.iter()
.enumerate()
.max_by_key(|&(_, s)| s)
.map(|(i, _)| i)
.unwrap_or(0);
community_members[largest].push(node);
node_community[node] = largest;
}
}
node_community
}
fn wire_edges(
n: usize,
degree_seq: &[usize],
communities: &[usize],
mu: f64,
rng: &mut StdRng,
) -> Vec<(usize, usize, f64)> {
let mut edges = Vec::new();
let mut adjacency: FxHashSet<(usize, usize)> = FxHashSet::default();
let num_comms = communities.iter().copied().max().unwrap_or(0) + 1;
let mut by_community: Vec<Vec<usize>> = vec![Vec::new(); num_comms];
for node in 0..n {
by_community[communities[node]].push(node);
}
for node in 0..n {
let total_degree = degree_seq[node];
let intra_degree = ((1.0 - mu) * total_degree as f64).round() as usize;
let inter_degree = total_degree.saturating_sub(intra_degree);
let comm = communities[node];
let same_community: Vec<usize> = by_community
.get(comm)
.map(|v| v.iter().filter(|&&v| v != node).copied().collect())
.unwrap_or_default();
let mut wired = 0;
for _ in 0..intra_degree * 10 {
if wired >= intra_degree || same_community.is_empty() {
break;
}
let v = same_community[rng.random_range(0..same_community.len())];
let key = if node < v { (node, v) } else { (v, node) };
if adjacency.insert(key) {
edges.push((node, v, 1.0));
wired += 1;
}
}
let diff_community: Vec<usize> = (0..n)
.filter(|&v| v != node && communities[v] != comm)
.collect();
let mut wired = 0;
for _ in 0..inter_degree * 10 {
if wired >= inter_degree || diff_community.is_empty() {
break;
}
let v = diff_community[rng.random_range(0..diff_community.len())];
let key = if node < v { (node, v) } else { (v, node) };
if adjacency.insert(key) {
edges.push((node, v, 1.0));
wired += 1;
}
}
}
edges
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashSet;
#[test]
fn test_lfr_basic() {
let config = LfrConfig {
n: 100,
tau1: 3.0,
tau2: 1.5,
mu: 0.1,
average_degree: Some(5.0),
min_degree: None,
max_degree: None,
min_community: Some(10),
max_community: None,
seed: Some(42),
};
let graph = generate_lfr_graph(config).unwrap();
assert_eq!(graph.node_count, 100);
assert!(!graph.edges.is_empty());
assert_eq!(graph.ground_truth.len(), 100);
let unique_comms: HashSet<usize> = graph.ground_truth.iter().copied().collect();
assert!(unique_comms.len() >= 2);
}
#[test]
fn test_lfr_with_leiden() {
use crate::{Leiden, LeidenConfig};
use gryf::core::marker::Undirected;
use gryf::Graph;
let config = LfrConfig {
n: 250,
tau1: 3.0,
tau2: 1.5,
mu: 0.1,
average_degree: Some(5.0),
min_degree: None,
max_degree: None,
min_community: Some(20),
max_community: None,
seed: Some(42),
};
let lfr = generate_lfr_graph(config).unwrap();
let mut graph: Graph<i32, f64, Undirected> = Graph::new_undirected();
let nodes: Vec<_> = (0..lfr.node_count)
.map(|i| graph.add_vertex(i as i32))
.collect();
for &(u, v, w) in &lfr.edges {
graph.add_edge(&nodes[u], &nodes[v], w);
}
let leiden_config = LeidenConfig {
seed: Some(42),
..Default::default()
};
let result = Leiden::new(leiden_config).run(&graph).unwrap();
assert!(
result.partition.num_communities() >= 2,
"expected at least 2 communities, got {}",
result.partition.num_communities()
);
}
#[test]
fn test_lfr_invalid_params() {
let config = LfrConfig {
n: 100,
tau1: 0.5,
tau2: 1.5,
mu: 0.1,
average_degree: Some(5.0),
min_degree: None,
max_degree: None,
min_community: None,
max_community: None,
seed: Some(42),
};
assert!(generate_lfr_graph(config).is_err());
}
#[test]
fn test_lfr_invalid_mu() {
let config = LfrConfig {
n: 100,
tau1: 3.0,
tau2: 1.5,
mu: 1.5,
average_degree: Some(5.0),
min_degree: None,
max_degree: None,
min_community: None,
max_community: None,
seed: Some(42),
};
assert!(generate_lfr_graph(config).is_err());
}
#[test]
fn test_lfr_zero_nodes() {
let config = LfrConfig {
n: 0,
tau1: 3.0,
tau2: 1.5,
mu: 0.1,
average_degree: None,
min_degree: Some(5),
max_degree: None,
min_community: None,
max_community: None,
seed: Some(42),
};
assert!(generate_lfr_graph(config).is_err());
}
#[test]
fn test_lfr_deterministic_with_seed() {
let config1 = LfrConfig {
n: 50,
tau1: 3.0,
tau2: 1.5,
mu: 0.1,
average_degree: Some(5.0),
min_degree: None,
max_degree: None,
min_community: Some(10),
max_community: None,
seed: Some(123),
};
let config2 = LfrConfig {
n: 50,
tau1: 3.0,
tau2: 1.5,
mu: 0.1,
average_degree: Some(5.0),
min_degree: None,
max_degree: None,
min_community: Some(10),
max_community: None,
seed: Some(123),
};
let g1 = generate_lfr_graph(config1).unwrap();
let g2 = generate_lfr_graph(config2).unwrap();
assert_eq!(g1.edges, g2.edges);
assert_eq!(g1.ground_truth, g2.ground_truth);
assert_eq!(g1.community_sizes, g2.community_sizes);
}
#[test]
fn test_lfr_community_sizes_sum_to_n() {
let config = LfrConfig {
n: 200,
tau1: 3.0,
tau2: 1.5,
mu: 0.1,
average_degree: Some(5.0),
min_degree: None,
max_degree: None,
min_community: Some(10),
max_community: None,
seed: Some(42),
};
let graph = generate_lfr_graph(config).unwrap();
let total: usize = graph.community_sizes.iter().sum();
assert_eq!(total, graph.node_count);
}
}