graphops 0.2.1

Graph operators: PageRank/PPR/walks/reachability/node2vec/betweenness.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
//! PageRank centrality (power iteration on a random-surfer Markov chain).

use crate::graph::{Graph, WeightedGraph};
use crate::{Error, Result};

/// Result of a PageRank run: scores plus convergence diagnostics.
///
/// The `*_run` variants (`pagerank_run`, `pagerank_weighted_run`,
/// `personalized_pagerank_run`) return this struct; the shorter variants
/// (`pagerank`, etc.) discard the diagnostics and return only `scores`.
///
/// Use the diagnostic fields when tuning `max_iterations` / `tolerance` or
/// when bench-marking convergence across graphs of different sizes.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct PageRankRun {
    /// PageRank score per node, indexed by node id. Sums to approximately
    /// `1.0` modulo floating-point and dangling-node redistribution.
    pub scores: Vec<f64>,
    /// Number of power-iteration steps executed. Equal to
    /// `config.max_iterations` when `converged == false`.
    pub iterations: usize,
    /// L1 norm of the score delta at the final iteration. Compare against
    /// `config.tolerance` to see how close we were to converging.
    pub diff_l1: f64,
    /// Whether the run converged (`diff_l1 < config.tolerance`) before hitting
    /// `config.max_iterations`.
    pub converged: bool,
}

/// PageRank hyperparameters.
///
/// Defaults match the Brin–Page 1998 paper: `damping = 0.85`,
/// `max_iterations = 100`, `tolerance = 1e-6`. For high-precision downstream
/// work (personalized PageRank as a similarity metric) drop tolerance to
/// `1e-9` and bump iterations to `500`.
#[derive(Debug, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct PageRankConfig {
    /// Damping factor `alpha` in `[0, 1]`: the probability of following an
    /// edge at each step (vs teleporting uniformly). Classical value is `0.85`.
    pub damping: f64,
    /// Maximum power-iteration steps before giving up. The run returns
    /// with `converged = false` if the L1 delta is still above `tolerance`.
    pub max_iterations: usize,
    /// Convergence threshold on the L1 norm of the score delta between
    /// consecutive iterations. Lower = tighter convergence, more iterations.
    pub tolerance: f64,
}

impl Default for PageRankConfig {
    fn default() -> Self {
        Self {
            damping: 0.85,
            max_iterations: 100,
            tolerance: 1e-6,
        }
    }
}

impl PageRankConfig {
    /// Validate hyperparameters; returns `Error::InvalidParameter` on failure.
    pub fn validate(&self) -> Result<()> {
        if !self.damping.is_finite() {
            return Err(Error::InvalidParameter(
                "damping must be finite".to_string(),
            ));
        }
        if !(0.0..=1.0).contains(&self.damping) {
            return Err(Error::InvalidParameter(
                "damping must be in [0,1]".to_string(),
            ));
        }
        if self.max_iterations == 0 {
            return Err(Error::InvalidParameter(
                "max_iterations must be > 0".to_string(),
            ));
        }
        if !self.tolerance.is_finite() || self.tolerance <= 0.0 {
            return Err(Error::InvalidParameter(
                "tolerance must be finite and > 0".to_string(),
            ));
        }
        Ok(())
    }
}

/// Checked PageRank centrality.
///
/// This validates `config` and rejects obviously-invalid numeric settings.
pub fn pagerank_checked<G: Graph>(graph: &G, config: PageRankConfig) -> Result<Vec<f64>> {
    config.validate()?;
    Ok(pagerank(graph, config))
}

/// Compute PageRank scores for all nodes.
///
/// # Examples
///
/// ```
/// use graphops::{pagerank, AdjacencyMatrix, PageRankConfig};
///
/// // 3-node directed graph: 0->1, 1->2, 2->0
/// let adj = vec![
///     vec![0.0, 1.0, 0.0],
///     vec![0.0, 0.0, 1.0],
///     vec![1.0, 0.0, 0.0],
/// ];
/// let scores = pagerank(&AdjacencyMatrix(&adj), PageRankConfig::default());
/// assert_eq!(scores.len(), 3);
/// let total: f64 = scores.iter().sum();
/// assert!((total - 1.0).abs() < 1e-6);
/// ```
pub fn pagerank<G: Graph>(graph: &G, config: PageRankConfig) -> Vec<f64> {
    pagerank_run(graph, config).scores
}

/// PageRank with convergence reporting.
///
/// `iterations` is the number of update steps performed.
/// `diff_l1` is the final \(L_1\) residual (sum of absolute deltas).
pub fn pagerank_run<G: Graph>(graph: &G, config: PageRankConfig) -> PageRankRun {
    let n = graph.node_count();
    if n == 0 {
        return PageRankRun {
            scores: Vec::new(),
            iterations: 0,
            diff_l1: 0.0,
            converged: true,
        };
    }
    let n_f64 = n as f64;
    let mut scores = vec![1.0 / n_f64; n];
    let mut new_scores = vec![0.0; n];
    // Precompute neighbor lists once to avoid per-iteration allocation.
    let neighbors: Vec<Vec<usize>> = (0..n).map(|u| graph.neighbors(u)).collect();
    let out_degrees: Vec<usize> = neighbors.iter().map(|nb| nb.len()).collect();

    let mut iters = 0usize;
    let mut last_diff = f64::INFINITY;
    let mut converged = false;
    for _ in 0..config.max_iterations {
        iters += 1;
        let dangling_sum: f64 = out_degrees
            .iter()
            .enumerate()
            .filter(|(_, &deg)| deg == 0)
            .map(|(i, _)| scores[i])
            .sum();
        let dangling_contrib = config.damping * dangling_sum / n_f64;
        let teleport = (1.0 - config.damping) / n_f64;
        new_scores.fill(teleport + dangling_contrib);

        for u in 0..n {
            let deg = out_degrees[u];
            if deg > 0 {
                let share = config.damping * scores[u] / deg as f64;
                for &v in &neighbors[u] {
                    new_scores[v] += share;
                }
            }
        }

        let diff: f64 = scores
            .iter()
            .zip(new_scores.iter())
            .map(|(old, new)| (old - new).abs())
            .sum();
        last_diff = diff;
        std::mem::swap(&mut scores, &mut new_scores);
        if diff < config.tolerance {
            converged = true;
            break;
        }
    }
    PageRankRun {
        scores,
        iterations: iters,
        diff_l1: last_diff,
        converged,
    }
}

/// Weighted PageRank centrality.
///
/// Edges are treated as having non-negative weights, and a node's outgoing mass is split
/// proportionally to outgoing edge weights.
///
/// This matches the Markov chain transition:
/// \[
///   P(u \to v) = \frac{w(u,v)}{\sum_x w(u,x)}
/// \]
///
/// Note: `pagerank_weighted` clamps negative weights to 0.0 for robustness. If you need strict
/// validation (no negatives/NaNs), use `pagerank_weighted_checked`.
pub fn pagerank_weighted<G: WeightedGraph>(graph: &G, config: PageRankConfig) -> Vec<f64> {
    pagerank_weighted_run(graph, config).scores
}

/// Like [`pagerank_weighted`] but returns full [`PageRankRun`] diagnostics.
///
/// Prefer this when you need to know whether the run converged
/// (`result.converged`) or how close it got (`result.diff_l1`). The scalar
/// `pagerank_weighted` wrapper discards everything but `scores`.
pub fn pagerank_weighted_run<G: WeightedGraph>(graph: &G, config: PageRankConfig) -> PageRankRun {
    let n = graph.node_count();
    if n == 0 {
        return PageRankRun {
            scores: Vec::new(),
            iterations: 0,
            diff_l1: 0.0,
            converged: true,
        };
    }

    let n_f64 = n as f64;
    let mut scores = vec![1.0 / n_f64; n];
    let mut new_scores = vec![0.0; n];

    // Precompute outgoing neighbors once (Graph) and outgoing weight sums (WeightedGraph).
    let neighbors: Vec<Vec<usize>> = (0..n).map(|u| graph.neighbors(u)).collect();
    let out_wsum: Vec<f64> = (0..n)
        .map(|u| {
            neighbors[u]
                .iter()
                .map(|&v| graph.edge_weight(u, v).max(0.0))
                .sum()
        })
        .collect();

    let mut iters = 0usize;
    let mut last_diff = f64::INFINITY;
    let mut converged = false;
    for _ in 0..config.max_iterations {
        iters += 1;
        let dangling_sum: f64 = out_wsum
            .iter()
            .enumerate()
            .filter(|(_, &ws)| ws == 0.0)
            .map(|(i, _)| scores[i])
            .sum();

        let dangling_contrib = config.damping * dangling_sum / n_f64;
        let teleport = (1.0 - config.damping) / n_f64;
        new_scores.fill(teleport + dangling_contrib);

        for u in 0..n {
            let ws = out_wsum[u];
            if ws > 0.0 {
                // distribute along outgoing edges proportional to weight
                for &v in &neighbors[u] {
                    let w = graph.edge_weight(u, v).max(0.0);
                    if w > 0.0 {
                        new_scores[v] += config.damping * scores[u] * (w / ws);
                    }
                }
            }
        }

        let diff: f64 = scores
            .iter()
            .zip(new_scores.iter())
            .map(|(old, new)| (old - new).abs())
            .sum();
        last_diff = diff;
        std::mem::swap(&mut scores, &mut new_scores);
        if diff < config.tolerance {
            converged = true;
            break;
        }
    }

    PageRankRun {
        scores,
        iterations: iters,
        diff_l1: last_diff,
        converged,
    }
}

/// Checked weighted PageRank.
///
/// Rejects NaN/negative edge weights (instead of silently clamping them to 0.0).
pub fn pagerank_weighted_checked<G: WeightedGraph>(
    graph: &G,
    config: PageRankConfig,
) -> Result<Vec<f64>> {
    config.validate()?;
    let n = graph.node_count();
    for u in 0..n {
        for v in graph.neighbors(u) {
            let w = graph.edge_weight(u, v);
            if !w.is_finite() {
                return Err(Error::InvalidParameter(
                    "edge weights must be finite".to_string(),
                ));
            }
            if w < 0.0 {
                return Err(Error::InvalidParameter(
                    "edge weights must be non-negative".to_string(),
                ));
            }
        }
    }
    Ok(pagerank_weighted(graph, config))
}

/// Validated PageRank with full diagnostics. Errors on invalid `config` and
/// returns a [`PageRankRun`] on success. Equivalent to calling
/// [`PageRankConfig::validate`] then [`pagerank_run`].
pub fn pagerank_checked_run<G: Graph>(graph: &G, config: PageRankConfig) -> Result<PageRankRun> {
    config.validate()?;
    Ok(pagerank_run(graph, config))
}

/// Validated weighted PageRank with full diagnostics. Errors on invalid config,
/// NaN weights, or negative weights. Returns a [`PageRankRun`] on success.
pub fn pagerank_weighted_checked_run<G: WeightedGraph>(
    graph: &G,
    config: PageRankConfig,
) -> Result<PageRankRun> {
    // Validate config + weights by calling the existing checked wrapper first.
    // Then re-run with convergence reporting (no additional validation needed).
    let _ = pagerank_weighted_checked(graph, config)?;
    Ok(pagerank_weighted_run(graph, config))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::graph::AdjacencyMatrix;
    use proptest::prelude::*;

    #[test]
    fn test_pagerank_weighted_sums_to_one() {
        // 3 nodes, weighted edges:
        // 0 -> 1 (2.0), 0 -> 2 (1.0)
        // 1 -> 2 (1.0)
        // 2 -> (dangling)
        let adj = vec![
            vec![0.0, 2.0, 1.0],
            vec![0.0, 0.0, 1.0],
            vec![0.0, 0.0, 0.0],
        ];
        let g = AdjacencyMatrix(&adj);
        let scores = pagerank_weighted(&g, PageRankConfig::default());
        let total: f64 = scores.iter().sum();
        assert!((total - 1.0).abs() < 1e-6, "sum={total}");
    }

    #[test]
    fn test_pagerank_weight_biases_toward_heavier_edge() {
        // 0 links to 1 twice as strongly as to 2, so 1 should rank >= 2.
        let adj = vec![
            vec![0.0, 2.0, 1.0],
            vec![0.0, 0.0, 0.0],
            vec![0.0, 0.0, 0.0],
        ];
        let g = AdjacencyMatrix(&adj);
        let scores = pagerank_weighted(&g, PageRankConfig::default());
        assert!(
            scores[1] >= scores[2],
            "scores[1]={} scores[2]={}",
            scores[1],
            scores[2]
        );
    }

    proptest! {
        #[test]
        fn prop_pagerank_unweighted_sums_to_one(n in 1usize..10, edges in proptest::collection::vec((0usize..10, 0usize..10), 0..40)) {
            // Build unweighted adjacency matrix with 0/1 entries.
            let mut adj = vec![vec![0.0_f64; n]; n];
            for (u,v) in edges {
                if u < n && v < n && u != v {
                    adj[u][v] = 1.0;
                }
            }
            let g = AdjacencyMatrix(&adj);
            let scores = pagerank_checked(&g, PageRankConfig::default()).unwrap();
            prop_assert_eq!(scores.len(), n);
            let sum: f64 = scores.iter().sum();
            prop_assert!((sum - 1.0).abs() < 1e-6, "sum={}", sum);
            prop_assert!(scores.iter().all(|x| *x >= -1e-12));
        }

        #[test]
        fn prop_pagerank_weighted_checked_rejects_negative(n in 2usize..10, u in 0usize..10, v in 0usize..10) {
            #[derive(Debug)]
            struct TestWeighted {
                neighbors: Vec<Vec<usize>>,
                w: Vec<Vec<f64>>,
            }
            impl crate::graph::Graph for TestWeighted {
                fn node_count(&self) -> usize { self.neighbors.len() }
                fn neighbors(&self, node: usize) -> Vec<usize> { self.neighbors[node].clone() }
            }
            impl crate::graph::WeightedGraph for TestWeighted {
                fn edge_weight(&self, source: usize, target: usize) -> f64 { self.w[source][target] }
            }

            let u = u % n;
            let mut v = v % n;
            if v == u { v = (v + 1) % n; }

            let mut neighbors = vec![Vec::<usize>::new(); n];
            let mut w = vec![vec![0.0_f64; n]; n];
            neighbors[u].push(v);
            w[u][v] = -1.0;

            let g = TestWeighted { neighbors, w };
            let res = pagerank_weighted_checked(&g, PageRankConfig::default());
            prop_assert!(res.is_err());
        }
    }
}