use crate::error::{LeidenError, Result};
use crate::graph::data::GraphData;
pub struct GraphDataBuilder {
node_count: usize,
directed: bool,
edges: Vec<(usize, usize, f64)>,
node_weights: Vec<f64>,
}
impl GraphDataBuilder {
#[must_use = "constructor returns a new instance"]
pub fn new(node_count: usize) -> Self {
Self {
node_count,
directed: false,
edges: Vec::new(),
node_weights: vec![1.0; node_count],
}
}
pub fn directed(mut self) -> Self {
self.directed = true;
self
}
pub fn add_edge(&mut self, src: usize, dst: usize, weight: f64) -> Result<&mut Self> {
if !(weight.is_finite() && weight >= 0.0) {
return Err(LeidenError::InvalidEdgeWeight { weight });
}
if src >= self.node_count || dst >= self.node_count {
return Err(LeidenError::InconsistentStructure {
message: format!(
"node ID {} exceeds node_count {}",
src.max(dst),
self.node_count
),
});
}
self.edges.push((src, dst, weight));
Ok(self)
}
pub fn set_node_weight(&mut self, node: usize, weight: f64) -> Result<&mut Self> {
if node >= self.node_count {
return Err(LeidenError::InconsistentStructure {
message: format!("node ID {} exceeds node_count {}", node, self.node_count),
});
}
self.node_weights[node] = weight;
Ok(self)
}
pub fn build(self) -> Result<GraphData> {
if self.directed {
build_directed_csr(self.node_count, self.edges, self.node_weights)
} else {
build_undirected_csr(self.node_count, self.edges, self.node_weights)
}
}
}
fn build_undirected_csr(
n: usize,
mut edges: Vec<(usize, usize, f64)>,
node_weights: Vec<f64>,
) -> Result<GraphData> {
edges.sort_by_key(|a| (a.0, a.1));
edges = {
let mut merged: Vec<(usize, usize, f64)> = Vec::with_capacity(edges.len());
for edge in edges {
if let Some(last) = merged.last_mut() {
if last.0 == edge.0 && last.1 == edge.1 {
last.2 += edge.2;
continue;
}
}
merged.push(edge);
}
merged
};
let mut neighbor_count: Vec<usize> = vec![0; n];
for &(u, v, _) in &edges {
neighbor_count[u] += 1;
if u != v {
neighbor_count[v] += 1;
}
}
let mut out_offsets: Vec<usize> = Vec::with_capacity(n + 1);
out_offsets.push(0);
let mut total = 0;
for &count in &neighbor_count {
total += count;
out_offsets.push(total);
}
let mut out_targets: Vec<usize> = vec![0; total];
let mut out_weights: Vec<f64> = vec![0.0; total];
let mut cursor: Vec<usize> = out_offsets[..n].to_vec();
for &(u, v, w) in &edges {
out_targets[cursor[u]] = v;
out_weights[cursor[u]] = w;
cursor[u] += 1;
if u != v {
out_targets[cursor[v]] = u;
out_weights[cursor[v]] = w;
cursor[v] += 1;
}
}
let degree: Vec<f64> = (0..n)
.map(|node| {
let start = out_offsets[node];
let end = out_offsets[node + 1];
let row_sum: f64 = out_weights[start..end].iter().sum();
let self_loop_sum: f64 = out_targets[start..end]
.iter()
.zip(out_weights[start..end].iter())
.filter(|&(&t, _)| t == node)
.map(|(_, &w)| w)
.sum();
row_sum + self_loop_sum
})
.collect();
validate_csr(
n,
&out_offsets,
&out_targets,
&out_weights,
&node_weights,
)?;
let total_weight = degree.iter().sum::<f64>() / 2.0;
let total_node_weight: f64 = node_weights.iter().sum();
Ok(GraphData {
n,
out_offsets,
out_targets,
out_weights,
total_weight,
total_node_weight,
out_degree: degree,
node_weight: node_weights,
directed: false,
in_offsets: Vec::new(),
in_targets: Vec::new(),
in_weights: Vec::new(),
in_degree: Vec::new(),
})
}
fn build_directed_csr(
n: usize,
mut edges: Vec<(usize, usize, f64)>,
node_weights: Vec<f64>,
) -> Result<GraphData> {
edges.sort_by_key(|a| (a.0, a.1));
edges = {
let mut merged: Vec<(usize, usize, f64)> = Vec::with_capacity(edges.len());
for edge in edges {
if let Some(last) = merged.last_mut() {
if last.0 == edge.0 && last.1 == edge.1 {
last.2 += edge.2;
continue;
}
}
merged.push(edge);
}
merged
};
let mut out_neighbor_count: Vec<usize> = vec![0; n];
for &(u, _v, _) in &edges {
out_neighbor_count[u] += 1;
}
let mut out_offsets: Vec<usize> = Vec::with_capacity(n + 1);
out_offsets.push(0);
let mut total = 0;
for &count in &out_neighbor_count {
total += count;
out_offsets.push(total);
}
let mut out_targets: Vec<usize> = vec![0; total];
let mut out_weights: Vec<f64> = vec![0.0; total];
let mut out_cursor: Vec<usize> = out_offsets[..n].to_vec();
for &(u, v, w) in &edges {
let idx = out_cursor[u];
out_targets[idx] = v;
out_weights[idx] = w;
out_cursor[u] += 1;
}
let mut in_neighbor_count: Vec<usize> = vec![0; n];
for &(_u, v, _) in &edges {
in_neighbor_count[v] += 1;
}
let mut in_offsets: Vec<usize> = Vec::with_capacity(n + 1);
in_offsets.push(0);
total = 0;
for &count in &in_neighbor_count {
total += count;
in_offsets.push(total);
}
let mut in_targets: Vec<usize> = vec![0; total];
let mut in_weights: Vec<f64> = vec![0.0; total];
let mut in_cursor: Vec<usize> = in_offsets[..n].to_vec();
for &(u, v, w) in &edges {
let idx = in_cursor[v];
in_targets[idx] = u;
in_weights[idx] = w;
in_cursor[v] += 1;
}
let out_degree: Vec<f64> = (0..n)
.map(|node| {
let start = out_offsets[node];
let end = out_offsets[node + 1];
out_weights[start..end].iter().sum()
})
.collect();
let in_degree: Vec<f64> = (0..n)
.map(|node| {
let start = in_offsets[node];
let end = in_offsets[node + 1];
in_weights[start..end].iter().sum()
})
.collect();
validate_csr(
n,
&out_offsets,
&out_targets,
&out_weights,
&node_weights,
)?;
validate_csr(
n,
&in_offsets,
&in_targets,
&in_weights,
&node_weights,
)?;
let total_weight: f64 = edges.iter().map(|&(_, _, w)| w).sum();
let total_node_weight: f64 = node_weights.iter().sum();
Ok(GraphData {
n,
out_offsets,
out_targets,
out_weights,
total_weight,
total_node_weight,
out_degree,
node_weight: node_weights,
directed: true,
in_offsets,
in_targets,
in_weights,
in_degree,
})
}
fn validate_csr(
n: usize,
offsets: &[usize],
targets: &[usize],
weights: &[f64],
node_weight: &[f64],
) -> Result<()> {
if offsets.len() != n + 1 {
return Err(LeidenError::InconsistentStructure {
message: format!("offsets length {} != n + 1 ({})", offsets.len(), n + 1),
});
}
if targets.len() != weights.len() {
return Err(LeidenError::InconsistentStructure {
message: format!(
"targets length {} != weights length {}",
targets.len(),
weights.len()
),
});
}
if node_weight.len() != n {
return Err(LeidenError::InconsistentStructure {
message: format!("node_weight length {} != n ({})", node_weight.len(), n),
});
}
if offsets[0] != 0 {
return Err(LeidenError::InconsistentStructure {
message: format!("offsets[0] must be 0, got {}", offsets[0]),
});
}
if offsets[n] != targets.len() {
return Err(LeidenError::InconsistentStructure {
message: format!(
"offsets[n] ({}) != targets.len() ({})",
offsets[n],
targets.len()
),
});
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_builder_triangle() {
let mut b = GraphDataBuilder::new(3);
b.add_edge(0, 1, 1.0).unwrap();
b.add_edge(1, 2, 2.0).unwrap();
b.add_edge(0, 2, 3.0).unwrap();
let gd = b.build().unwrap();
assert_eq!(gd.node_count(), 3);
assert!((gd.total_weight() - 6.0).abs() < 1e-10);
assert!((gd.degree_of(0) - 4.0).abs() < 1e-10);
assert!((gd.degree_of(1) - 3.0).abs() < 1e-10);
assert!((gd.degree_of(2) - 5.0).abs() < 1e-10);
}
#[test]
fn test_builder_self_loop() {
let mut b = GraphDataBuilder::new(2);
b.add_edge(0, 0, 5.0).unwrap();
b.add_edge(0, 1, 1.0).unwrap();
let gd = b.build().unwrap();
assert_eq!(gd.node_count(), 2);
assert!((gd.degree_of(0) - 11.0).abs() < 1e-10);
assert!((gd.degree_of(1) - 1.0).abs() < 1e-10);
assert!((gd.total_weight() - 6.0).abs() < 1e-10);
}
#[test]
fn test_builder_invalid_weight() {
let mut b = GraphDataBuilder::new(3);
assert!(b.add_edge(0, 1, f64::NAN).is_err());
assert!(b.add_edge(0, 1, f64::INFINITY).is_err());
assert!(b.add_edge(0, 1, -1.0).is_err());
}
#[test]
fn test_builder_node_out_of_range() {
let mut b = GraphDataBuilder::new(3);
assert!(b.add_edge(0, 5, 1.0).is_err());
assert!(b.add_edge(5, 0, 1.0).is_err());
}
#[test]
fn test_builder_set_node_weight() {
let mut b = GraphDataBuilder::new(3);
b.set_node_weight(1, 5.0).unwrap();
let gd = b.build().unwrap();
assert!((gd.node_weight(0) - 1.0).abs() < 1e-10);
assert!((gd.node_weight(1) - 5.0).abs() < 1e-10);
assert!((gd.node_weight(2) - 1.0).abs() < 1e-10);
}
#[test]
fn test_builder_directed_basic() {
let mut b = GraphDataBuilder::new(4).directed();
b.add_edge(0, 1, 1.0).unwrap();
b.add_edge(1, 2, 2.0).unwrap();
b.add_edge(2, 0, 3.0).unwrap();
b.add_edge(0, 3, 0.5).unwrap();
let gd = b.build().unwrap();
assert_eq!(gd.node_count(), 4);
assert!(gd.is_directed());
assert!((gd.total_weight() - 6.5).abs() < 1e-10);
assert!((gd.out_degree_of(0) - 1.5).abs() < 1e-10);
assert!((gd.out_degree_of(1) - 2.0).abs() < 1e-10);
assert!((gd.out_degree_of(2) - 3.0).abs() < 1e-10);
assert!((gd.out_degree_of(3) - 0.0).abs() < 1e-10);
assert!((gd.in_degree_of(0) - 3.0).abs() < 1e-10);
assert!((gd.in_degree_of(1) - 1.0).abs() < 1e-10);
assert!((gd.in_degree_of(2) - 2.0).abs() < 1e-10);
assert!((gd.in_degree_of(3) - 0.5).abs() < 1e-10);
assert!((gd.degree_of(0) - 4.5).abs() < 1e-10);
assert!((gd.degree_of(1) - 3.0).abs() < 1e-10);
}
#[test]
fn test_builder_directed_self_loop() {
let mut b = GraphDataBuilder::new(3).directed();
b.add_edge(0, 0, 5.0).unwrap();
b.add_edge(0, 1, 1.0).unwrap();
let gd = b.build().unwrap();
assert!((gd.out_degree_of(0) - 6.0).abs() < 1e-10);
assert!((gd.in_degree_of(0) - 5.0).abs() < 1e-10);
assert!((gd.in_degree_of(1) - 1.0).abs() < 1e-10);
assert!((gd.total_weight() - 6.0).abs() < 1e-10);
}
#[test]
fn test_builder_empty_graph() {
let gd = GraphDataBuilder::new(5).build().unwrap();
assert_eq!(gd.node_count(), 5);
assert!((gd.total_weight() - 0.0).abs() < 1e-10);
for i in 0..5 {
assert!((gd.degree_of(i) - 0.0).abs() < 1e-10);
assert_eq!(gd.neighbors(i).count(), 0);
}
}
#[test]
fn test_duplicate_edges_merged_in_build() {
let n = 3;
let mut b = GraphDataBuilder::new(n);
b.add_edge(0, 1, 1.0).unwrap();
b.add_edge(0, 1, 1.0).unwrap(); let g = b.build().unwrap();
assert!((g.total_weight() - 2.0).abs() < 1e-10);
let nbrs: Vec<_> = g.neighbors(0).collect();
assert_eq!(nbrs.len(), 1);
assert!((nbrs[0].1 - 2.0).abs() < 1e-10);
}
#[test]
fn test_duplicate_edges_sum_weights() {
let mut b = GraphDataBuilder::new(2);
b.add_edge(0, 1, 1.0).unwrap();
b.add_edge(0, 1, 2.0).unwrap();
let g = b.build().unwrap();
let nbrs: Vec<_> = g.neighbors(0).collect();
assert_eq!(nbrs.len(), 1);
assert!((nbrs[0].1 - 3.0).abs() < 1e-10);
}
#[test]
fn test_builder_matches_from_edgelist() {
let edges: Vec<(usize, usize, f64)> =
vec![(0, 1, 1.0), (1, 2, 2.0), (0, 2, 3.0), (2, 2, 0.5)];
let mut b = GraphDataBuilder::new(3);
for &(u, v, w) in &edges {
b.add_edge(u, v, w).unwrap();
}
let gd = b.build().unwrap();
let mut expected_degree = [0.0f64; 3];
for &(u, v, w) in &edges {
if u == v {
expected_degree[u] += 2.0 * w;
} else {
expected_degree[u] += w;
expected_degree[v] += w;
}
}
let expected_total: f64 = expected_degree.iter().sum::<f64>() / 2.0;
for (i, &expected) in expected_degree.iter().enumerate() {
assert!(
(gd.degree_of(i) - expected).abs() < 1e-10,
"degree mismatch at node {i}"
);
}
assert!(
(gd.total_weight() - expected_total).abs() < 1e-10,
"total_weight mismatch"
);
}
#[test]
fn test_large_graph_float_precision() {
let n = 2500;
let mut b = GraphDataBuilder::new(n);
for i in 0..n {
b.add_edge(i, (i + 1) % n, 1.0 / 3.0).unwrap();
b.add_edge(i, (i + 2) % n, 1.0 / 3.0).unwrap();
b.add_edge(i, (i + 3) % n, 1.0 / 3.0).unwrap();
}
let g = b.build().unwrap();
for i in 0..n {
let neighbors: Vec<_> = g.neighbors(i).collect();
let row_sum: f64 = neighbors.iter().map(|&(_, w)| w).sum();
let self_loop_sum: f64 = neighbors
.iter()
.filter(|&&(t, _)| t == i)
.map(|&(_, w)| w)
.sum();
assert!(
(g.degree_of(i) - (row_sum + self_loop_sum)).abs() < 1e-12,
"degree mismatch at node {i}: degree={}, row_sum+slf={}",
g.degree_of(i),
row_sum + self_loop_sum
);
}
}
#[test]
fn test_large_directed_float_precision() {
let n = 2500;
let mut b = GraphDataBuilder::new(n).directed();
for i in 0..n {
b.add_edge(i, (i + 1) % n, 1.0 / 3.0).unwrap();
b.add_edge(i, (i + 2) % n, 1.0 / 3.0).unwrap();
b.add_edge(i, (i + 3) % n, 1.0 / 3.0).unwrap();
}
let g = b.build().unwrap();
for i in 0..n {
let out_nbrs: Vec<_> = g.out_neighbors(i).collect();
let out_sum: f64 = out_nbrs.iter().map(|&(_, w)| w).sum();
assert!(
(g.out_degree_of(i) - out_sum).abs() < 1e-12,
"out_degree mismatch at node {i}"
);
let in_nbrs: Vec<_> = g.in_neighbors(i).collect();
let in_sum: f64 = in_nbrs.iter().map(|&(_, w)| w).sum();
assert!(
(g.in_degree_of(i) - in_sum).abs() < 1e-12,
"in_degree mismatch at node {i}"
);
}
}
}