Skip to main content

leiden_rs/graph/
builder.rs

1//! Builder for constructing [`GraphData`] from edges and node weights.
2//!
3//! [`GraphDataBuilder`] is the single entry-point for creating a [`GraphData`]
4//! instance. It accumulates edges and optional node weights, then builds the
5//! internal CSR structure on [`build`](GraphDataBuilder::build).
6//!
7//! # Example
8//!
9//! ```
10//! use leiden_rs::graph::GraphDataBuilder;
11//!
12//! let mut b = GraphDataBuilder::new(4);
13//! b.add_edge(0, 1, 1.0).unwrap();
14//! b.add_edge(1, 2, 2.0).unwrap();
15//! b.add_edge(2, 3, 1.5).unwrap();
16//! let graph = b.build().unwrap();
17//!
18//! assert_eq!(graph.node_count(), 4);
19//! ```
20
21use crate::error::{LeidenError, Result};
22use crate::graph::data::GraphData;
23
24/// Builder that accumulates edges and node weights, then produces a [`GraphData`].
25///
26/// This is the **only** way to construct a [`GraphData`] from raw edges. The
27/// builder validates inputs incrementally (on each [`add_edge`] call) and then
28/// builds the CSR layout in [`build`].
29///
30/// [`add_edge`]: GraphDataBuilder::add_edge
31/// [`build`]: GraphDataBuilder::build
32pub struct GraphDataBuilder {
33    node_count: usize,
34    directed: bool,
35    edges: Vec<(usize, usize, f64)>,
36    node_weights: Vec<f64>,
37}
38
39impl GraphDataBuilder {
40    /// Create a new builder for a graph with `node_count` nodes.
41    ///
42    /// All node weights default to `1.0`, `directed` defaults to `false`,
43    /// and the edge list starts empty.
44    #[must_use = "constructor returns a new instance"]
45    pub fn new(node_count: usize) -> Self {
46        Self {
47            node_count,
48            directed: false,
49            edges: Vec::new(),
50            node_weights: vec![1.0; node_count],
51        }
52    }
53
54    /// Set the graph to directed mode.
55    ///
56    pub fn directed(mut self) -> Self {
57        self.directed = true;
58        self
59    }
60
61    /// Add a weighted edge `(src, dst, weight)`.
62    ///
63    /// Returns `Err(LeidenError::InconsistentStructure)` if `src` or `dst` is
64    /// out of range, or `Err(LeidenError::InvalidEdgeWeight)` if the weight is
65    /// not finite and non-negative.
66    pub fn add_edge(&mut self, src: usize, dst: usize, weight: f64) -> Result<&mut Self> {
67        if !(weight.is_finite() && weight >= 0.0) {
68            return Err(LeidenError::InvalidEdgeWeight { weight });
69        }
70        if src >= self.node_count || dst >= self.node_count {
71            return Err(LeidenError::InconsistentStructure {
72                message: format!(
73                    "node ID {} exceeds node_count {}",
74                    src.max(dst),
75                    self.node_count
76                ),
77            });
78        }
79        self.edges.push((src, dst, weight));
80        Ok(self)
81    }
82
83    /// Override the weight for a single node.
84    ///
85    /// Returns `Err(LeidenError::InconsistentStructure)` if `node` is out of
86    /// range.
87    pub fn set_node_weight(&mut self, node: usize, weight: f64) -> Result<&mut Self> {
88        if node >= self.node_count {
89            return Err(LeidenError::InconsistentStructure {
90                message: format!("node ID {} exceeds node_count {}", node, self.node_count),
91            });
92        }
93        self.node_weights[node] = weight;
94        Ok(self)
95    }
96
97    /// Consume the builder and produce a [`GraphData`].
98    ///
99    /// Delegates to the appropriate CSR constructor based on the `directed`
100    /// flag. In Phase 1 only undirected construction is supported.
101    pub fn build(self) -> Result<GraphData> {
102        if self.directed {
103            build_directed_csr(self.node_count, self.edges, self.node_weights)
104        } else {
105            build_undirected_csr(self.node_count, self.edges, self.node_weights)
106        }
107    }
108}
109
110/// Build an undirected [`GraphData`] from an edge list.
111///
112/// Produces exactly the same CSR as the original `GraphData::from_edgelist`:
113///
114/// * Each edge `(u, v, w)` with `u != v` is stored twice — once in the
115///   adjacency of `u` and once in `v`. Self-loops `(u, u, w)` are stored
116///   once but contribute `2·w` to the degree.
117/// * `total_weight = degree.sum() / 2`
118/// * `in_*` fields are empty, `directed` is `false`.
119fn build_undirected_csr(
120    n: usize,
121    mut edges: Vec<(usize, usize, f64)>,
122    node_weights: Vec<f64>,
123) -> Result<GraphData> {
124    edges.sort_by_key(|a| (a.0, a.1));
125    // Merge consecutive duplicate edges by summing weights
126    edges = {
127        let mut merged: Vec<(usize, usize, f64)> = Vec::with_capacity(edges.len());
128        for edge in edges {
129            if let Some(last) = merged.last_mut() {
130                if last.0 == edge.0 && last.1 == edge.1 {
131                    last.2 += edge.2;
132                    continue;
133                }
134            }
135            merged.push(edge);
136        }
137        merged
138    };
139    let mut neighbor_count: Vec<usize> = vec![0; n];
140    for &(u, v, _) in &edges {
141        neighbor_count[u] += 1;
142        if u != v {
143            neighbor_count[v] += 1;
144        }
145    }
146
147    let mut out_offsets: Vec<usize> = Vec::with_capacity(n + 1);
148    out_offsets.push(0);
149    let mut total = 0;
150    for &count in &neighbor_count {
151        total += count;
152        out_offsets.push(total);
153    }
154
155    let mut out_targets: Vec<usize> = vec![0; total];
156    let mut out_weights: Vec<f64> = vec![0.0; total];
157    let mut cursor: Vec<usize> = out_offsets[..n].to_vec();
158
159    for &(u, v, w) in &edges {
160        out_targets[cursor[u]] = v;
161        out_weights[cursor[u]] = w;
162        cursor[u] += 1;
163        if u != v {
164            out_targets[cursor[v]] = u;
165            out_weights[cursor[v]] = w;
166            cursor[v] += 1;
167        }
168    }
169
170    // Derive degree from CSR weights (single source of truth).
171    // Self-loops are stored once in the CSR but contribute 2×w to degree.
172    let degree: Vec<f64> = (0..n)
173        .map(|node| {
174            let start = out_offsets[node];
175            let end = out_offsets[node + 1];
176            let row_sum: f64 = out_weights[start..end].iter().sum();
177            let self_loop_sum: f64 = out_targets[start..end]
178                .iter()
179                .zip(out_weights[start..end].iter())
180                .filter(|&(&t, _)| t == node)
181                .map(|(_, &w)| w)
182                .sum();
183            row_sum + self_loop_sum
184        })
185        .collect();
186
187    validate_csr(
188        n,
189        &out_offsets,
190        &out_targets,
191        &out_weights,
192        &node_weights,
193    )?;
194
195    let total_weight = degree.iter().sum::<f64>() / 2.0;
196    let total_node_weight: f64 = node_weights.iter().sum();
197
198    Ok(GraphData {
199        n,
200        out_offsets,
201        out_targets,
202        out_weights,
203        total_weight,
204        total_node_weight,
205        out_degree: degree,
206        node_weight: node_weights,
207        directed: false,
208        in_offsets: Vec::new(),
209        in_targets: Vec::new(),
210        in_weights: Vec::new(),
211        in_degree: Vec::new(),
212    })
213}
214
215/// Build a directed [`GraphData`] from an edge list.
216///
217/// Each edge `(u, v, w)` is stored once in the out-edge CSR of `u` and once
218/// in the in-edge CSR of `v`. Self-loops `(u, u, w)` are stored once in each CSR
219/// and contribute `w` to both out-degree and in-degree.
220///
221/// `total_weight = sum of all edge weights` (each edge counted once).
222fn build_directed_csr(
223    n: usize,
224    mut edges: Vec<(usize, usize, f64)>,
225    node_weights: Vec<f64>,
226) -> Result<GraphData> {
227    edges.sort_by_key(|a| (a.0, a.1));
228    // Merge consecutive duplicate edges by summing weights
229    edges = {
230        let mut merged: Vec<(usize, usize, f64)> = Vec::with_capacity(edges.len());
231        for edge in edges {
232            if let Some(last) = merged.last_mut() {
233                if last.0 == edge.0 && last.1 == edge.1 {
234                    last.2 += edge.2;
235                    continue;
236                }
237            }
238            merged.push(edge);
239        }
240        merged
241    };
242    // ── Out-edge CSR ──
243    let mut out_neighbor_count: Vec<usize> = vec![0; n];
244    for &(u, _v, _) in &edges {
245        out_neighbor_count[u] += 1;
246    }
247
248    let mut out_offsets: Vec<usize> = Vec::with_capacity(n + 1);
249    out_offsets.push(0);
250    let mut total = 0;
251    for &count in &out_neighbor_count {
252        total += count;
253        out_offsets.push(total);
254    }
255
256    let mut out_targets: Vec<usize> = vec![0; total];
257    let mut out_weights: Vec<f64> = vec![0.0; total];
258    let mut out_cursor: Vec<usize> = out_offsets[..n].to_vec();
259
260    for &(u, v, w) in &edges {
261        let idx = out_cursor[u];
262        out_targets[idx] = v;
263        out_weights[idx] = w;
264        out_cursor[u] += 1;
265    }
266
267    // ── In-edge CSR ──
268    let mut in_neighbor_count: Vec<usize> = vec![0; n];
269    for &(_u, v, _) in &edges {
270        in_neighbor_count[v] += 1;
271    }
272
273    let mut in_offsets: Vec<usize> = Vec::with_capacity(n + 1);
274    in_offsets.push(0);
275    total = 0;
276    for &count in &in_neighbor_count {
277        total += count;
278        in_offsets.push(total);
279    }
280
281    let mut in_targets: Vec<usize> = vec![0; total];
282    let mut in_weights: Vec<f64> = vec![0.0; total];
283    let mut in_cursor: Vec<usize> = in_offsets[..n].to_vec();
284
285    for &(u, v, w) in &edges {
286        let idx = in_cursor[v];
287        in_targets[idx] = u;
288        in_weights[idx] = w;
289        in_cursor[v] += 1;
290    }
291
292    // Derive out_degree and in_degree from CSR weights (single source of truth).
293    let out_degree: Vec<f64> = (0..n)
294        .map(|node| {
295            let start = out_offsets[node];
296            let end = out_offsets[node + 1];
297            out_weights[start..end].iter().sum()
298        })
299        .collect();
300    let in_degree: Vec<f64> = (0..n)
301        .map(|node| {
302            let start = in_offsets[node];
303            let end = in_offsets[node + 1];
304            in_weights[start..end].iter().sum()
305        })
306        .collect();
307
308    validate_csr(
309        n,
310        &out_offsets,
311        &out_targets,
312        &out_weights,
313        &node_weights,
314    )?;
315    validate_csr(
316        n,
317        &in_offsets,
318        &in_targets,
319        &in_weights,
320        &node_weights,
321    )?;
322
323    let total_weight: f64 = edges.iter().map(|&(_, _, w)| w).sum();
324    let total_node_weight: f64 = node_weights.iter().sum();
325
326    Ok(GraphData {
327        n,
328        out_offsets,
329        out_targets,
330        out_weights,
331        total_weight,
332        total_node_weight,
333        out_degree,
334        node_weight: node_weights,
335        directed: true,
336        in_offsets,
337        in_targets,
338        in_weights,
339        in_degree,
340    })
341}
342
343/// Validate the structural invariants of a CSR representation.
344///
345/// Checks:
346///
347/// * `offsets.len() == n + 1`
348/// * `targets.len() == weights.len()`
349/// * `node_weight.len() == n`
350/// * `offsets[0] == 0`
351/// * `offsets[n] == targets.len()`
352///
353/// All failures produce [`LeidenError::InconsistentStructure`].
354fn validate_csr(
355    n: usize,
356    offsets: &[usize],
357    targets: &[usize],
358    weights: &[f64],
359    node_weight: &[f64],
360) -> Result<()> {
361    if offsets.len() != n + 1 {
362        return Err(LeidenError::InconsistentStructure {
363            message: format!("offsets length {} != n + 1 ({})", offsets.len(), n + 1),
364        });
365    }
366    if targets.len() != weights.len() {
367        return Err(LeidenError::InconsistentStructure {
368            message: format!(
369                "targets length {} != weights length {}",
370                targets.len(),
371                weights.len()
372            ),
373        });
374    }
375    if node_weight.len() != n {
376        return Err(LeidenError::InconsistentStructure {
377            message: format!("node_weight length {} != n ({})", node_weight.len(), n),
378        });
379    }
380    if offsets[0] != 0 {
381        return Err(LeidenError::InconsistentStructure {
382            message: format!("offsets[0] must be 0, got {}", offsets[0]),
383        });
384    }
385    if offsets[n] != targets.len() {
386        return Err(LeidenError::InconsistentStructure {
387            message: format!(
388                "offsets[n] ({}) != targets.len() ({})",
389                offsets[n],
390                targets.len()
391            ),
392        });
393    }
394    Ok(())
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    #[test]
402    fn test_builder_triangle() {
403        let mut b = GraphDataBuilder::new(3);
404        b.add_edge(0, 1, 1.0).unwrap();
405        b.add_edge(1, 2, 2.0).unwrap();
406        b.add_edge(0, 2, 3.0).unwrap();
407        let gd = b.build().unwrap();
408
409        assert_eq!(gd.node_count(), 3);
410        // total_weight = (degree[0] + degree[1] + degree[2]) / 2
411        // degree[0] = 1 + 3 = 4, degree[1] = 1 + 2 = 3, degree[2] = 2 + 3 = 5
412        // total_weight = 12 / 2 = 6
413        assert!((gd.total_weight() - 6.0).abs() < 1e-10);
414        assert!((gd.degree_of(0) - 4.0).abs() < 1e-10);
415        assert!((gd.degree_of(1) - 3.0).abs() < 1e-10);
416        assert!((gd.degree_of(2) - 5.0).abs() < 1e-10);
417    }
418
419    #[test]
420    fn test_builder_self_loop() {
421        let mut b = GraphDataBuilder::new(2);
422        b.add_edge(0, 0, 5.0).unwrap();
423        b.add_edge(0, 1, 1.0).unwrap();
424        let gd = b.build().unwrap();
425
426        // degree[0] = 2*5 + 1 = 11, degree[1] = 1, total_weight = 12 / 2 = 6
427        assert_eq!(gd.node_count(), 2);
428        assert!((gd.degree_of(0) - 11.0).abs() < 1e-10);
429        assert!((gd.degree_of(1) - 1.0).abs() < 1e-10);
430        assert!((gd.total_weight() - 6.0).abs() < 1e-10);
431    }
432
433    #[test]
434    fn test_builder_invalid_weight() {
435        let mut b = GraphDataBuilder::new(3);
436        assert!(b.add_edge(0, 1, f64::NAN).is_err());
437        assert!(b.add_edge(0, 1, f64::INFINITY).is_err());
438        assert!(b.add_edge(0, 1, -1.0).is_err());
439    }
440
441    #[test]
442    fn test_builder_node_out_of_range() {
443        let mut b = GraphDataBuilder::new(3);
444        assert!(b.add_edge(0, 5, 1.0).is_err());
445        assert!(b.add_edge(5, 0, 1.0).is_err());
446    }
447
448    #[test]
449    fn test_builder_set_node_weight() {
450        let mut b = GraphDataBuilder::new(3);
451        b.set_node_weight(1, 5.0).unwrap();
452        let gd = b.build().unwrap();
453        assert!((gd.node_weight(0) - 1.0).abs() < 1e-10);
454        assert!((gd.node_weight(1) - 5.0).abs() < 1e-10);
455        assert!((gd.node_weight(2) - 1.0).abs() < 1e-10);
456    }
457
458    #[test]
459    fn test_builder_directed_basic() {
460        let mut b = GraphDataBuilder::new(4).directed();
461        b.add_edge(0, 1, 1.0).unwrap();
462        b.add_edge(1, 2, 2.0).unwrap();
463        b.add_edge(2, 0, 3.0).unwrap();
464        b.add_edge(0, 3, 0.5).unwrap();
465        let gd = b.build().unwrap();
466
467        assert_eq!(gd.node_count(), 4);
468        assert!(gd.is_directed());
469        // total_weight = 1.0 + 2.0 + 3.0 + 0.5 = 6.5
470        assert!((gd.total_weight() - 6.5).abs() < 1e-10);
471
472        // out_degree: 0→1+0.5=1.5, 1→2=2, 2→3=3, 3→0=0
473        assert!((gd.out_degree_of(0) - 1.5).abs() < 1e-10);
474        assert!((gd.out_degree_of(1) - 2.0).abs() < 1e-10);
475        assert!((gd.out_degree_of(2) - 3.0).abs() < 1e-10);
476        assert!((gd.out_degree_of(3) - 0.0).abs() < 1e-10);
477
478        // in_degree: 0→3=3, 1→1=1, 2→2=2, 3→0.5=0.5
479        assert!((gd.in_degree_of(0) - 3.0).abs() < 1e-10);
480        assert!((gd.in_degree_of(1) - 1.0).abs() < 1e-10);
481        assert!((gd.in_degree_of(2) - 2.0).abs() < 1e-10);
482        assert!((gd.in_degree_of(3) - 0.5).abs() < 1e-10);
483
484        // degree_of for directed = out + in
485        assert!((gd.degree_of(0) - 4.5).abs() < 1e-10);
486        assert!((gd.degree_of(1) - 3.0).abs() < 1e-10);
487    }
488
489    #[test]
490    fn test_builder_directed_self_loop() {
491        let mut b = GraphDataBuilder::new(3).directed();
492        b.add_edge(0, 0, 5.0).unwrap();
493        b.add_edge(0, 1, 1.0).unwrap();
494        let gd = b.build().unwrap();
495
496        // out_degree: 0→5+1=6, 1→0, 2→0
497        assert!((gd.out_degree_of(0) - 6.0).abs() < 1e-10);
498        // in_degree: 0→5, 1→1, 2→0
499        assert!((gd.in_degree_of(0) - 5.0).abs() < 1e-10);
500        assert!((gd.in_degree_of(1) - 1.0).abs() < 1e-10);
501        // total_weight = 5.0 + 1.0 = 6.0
502        assert!((gd.total_weight() - 6.0).abs() < 1e-10);
503    }
504
505    #[test]
506    fn test_builder_empty_graph() {
507        let gd = GraphDataBuilder::new(5).build().unwrap();
508        assert_eq!(gd.node_count(), 5);
509        assert!((gd.total_weight() - 0.0).abs() < 1e-10);
510        for i in 0..5 {
511            assert!((gd.degree_of(i) - 0.0).abs() < 1e-10);
512            assert_eq!(gd.neighbors(i).count(), 0);
513        }
514    }
515
516    #[test]
517    fn test_duplicate_edges_merged_in_build() {
518        let n = 3;
519        let mut b = GraphDataBuilder::new(n);
520        b.add_edge(0, 1, 1.0).unwrap();
521        b.add_edge(0, 1, 1.0).unwrap(); // duplicate — merged by summing
522        let g = b.build().unwrap();
523        // After merge: single edge (0,1,2.0), undirected: degree[0]=2, degree[1]=2, total=4/2=2
524        assert!((g.total_weight() - 2.0).abs() < 1e-10);
525        let nbrs: Vec<_> = g.neighbors(0).collect();
526        assert_eq!(nbrs.len(), 1);
527        assert!((nbrs[0].1 - 2.0).abs() < 1e-10);
528    }
529
530    #[test]
531    fn test_duplicate_edges_sum_weights() {
532        let mut b = GraphDataBuilder::new(2);
533        b.add_edge(0, 1, 1.0).unwrap();
534        b.add_edge(0, 1, 2.0).unwrap();
535        let g = b.build().unwrap();
536        let nbrs: Vec<_> = g.neighbors(0).collect();
537        assert_eq!(nbrs.len(), 1);
538        assert!((nbrs[0].1 - 3.0).abs() < 1e-10);
539    }
540
541    #[test]
542    fn test_builder_matches_from_edgelist() {
543        let edges: Vec<(usize, usize, f64)> =
544            vec![(0, 1, 1.0), (1, 2, 2.0), (0, 2, 3.0), (2, 2, 0.5)];
545
546        let mut b = GraphDataBuilder::new(3);
547        for &(u, v, w) in &edges {
548            b.add_edge(u, v, w).unwrap();
549        }
550        let gd = b.build().unwrap();
551
552        let mut expected_degree = [0.0f64; 3];
553        for &(u, v, w) in &edges {
554            if u == v {
555                expected_degree[u] += 2.0 * w;
556            } else {
557                expected_degree[u] += w;
558                expected_degree[v] += w;
559            }
560        }
561        let expected_total: f64 = expected_degree.iter().sum::<f64>() / 2.0;
562
563        for (i, &expected) in expected_degree.iter().enumerate() {
564            assert!(
565                (gd.degree_of(i) - expected).abs() < 1e-10,
566                "degree mismatch at node {i}"
567            );
568        }
569        assert!(
570            (gd.total_weight() - expected_total).abs() < 1e-10,
571            "total_weight mismatch"
572        );
573    }
574
575    #[test]
576    fn test_large_graph_float_precision() {
577        let n = 2500;
578        let mut b = GraphDataBuilder::new(n);
579        for i in 0..n {
580            b.add_edge(i, (i + 1) % n, 1.0 / 3.0).unwrap();
581            b.add_edge(i, (i + 2) % n, 1.0 / 3.0).unwrap();
582            b.add_edge(i, (i + 3) % n, 1.0 / 3.0).unwrap();
583        }
584        // Must not fail with InconsistentStructure
585        let g = b.build().unwrap();
586        for i in 0..n {
587            let neighbors: Vec<_> = g.neighbors(i).collect();
588            let row_sum: f64 = neighbors.iter().map(|&(_, w)| w).sum();
589            let self_loop_sum: f64 = neighbors
590                .iter()
591                .filter(|&&(t, _)| t == i)
592                .map(|&(_, w)| w)
593                .sum();
594            assert!(
595                (g.degree_of(i) - (row_sum + self_loop_sum)).abs() < 1e-12,
596                "degree mismatch at node {i}: degree={}, row_sum+slf={}",
597                g.degree_of(i),
598                row_sum + self_loop_sum
599            );
600        }
601    }
602
603    #[test]
604    fn test_large_directed_float_precision() {
605        let n = 2500;
606        let mut b = GraphDataBuilder::new(n).directed();
607        for i in 0..n {
608            b.add_edge(i, (i + 1) % n, 1.0 / 3.0).unwrap();
609            b.add_edge(i, (i + 2) % n, 1.0 / 3.0).unwrap();
610            b.add_edge(i, (i + 3) % n, 1.0 / 3.0).unwrap();
611        }
612        let g = b.build().unwrap();
613        for i in 0..n {
614            let out_nbrs: Vec<_> = g.out_neighbors(i).collect();
615            let out_sum: f64 = out_nbrs.iter().map(|&(_, w)| w).sum();
616            assert!(
617                (g.out_degree_of(i) - out_sum).abs() < 1e-12,
618                "out_degree mismatch at node {i}"
619            );
620            let in_nbrs: Vec<_> = g.in_neighbors(i).collect();
621            let in_sum: f64 = in_nbrs.iter().map(|&(_, w)| w).sum();
622            assert!(
623                (g.in_degree_of(i) - in_sum).abs() < 1e-12,
624                "in_degree mismatch at node {i}"
625            );
626        }
627    }
628}