Skip to main content

scirs2_graph/gpu/
algorithms.rs

1//! Parallel BFS and SSSP algorithms with a GPU-ready interface.
2//!
3//! All algorithms accept graphs in standard formats (CSR or adjacency lists)
4//! and run on the CPU using parallel iteration where beneficial. The API is
5//! designed to be swappable with GPU kernels in future releases.
6
7use std::cmp::Reverse;
8use std::collections::BinaryHeap;
9use std::sync::atomic::{AtomicUsize, Ordering};
10
11use crate::error::{GraphError, Result};
12
13/// Backend selection for GPU-accelerated graph algorithms.
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15#[non_exhaustive]
16pub enum GpuGraphBackend {
17    /// CPU-parallel execution using Rayon-compatible parallel iterators.
18    CpuParallel,
19    /// Future GPU backend (not yet implemented; falls back to CpuParallel).
20    Gpu,
21}
22
23/// Configuration for GPU/parallel BFS and SSSP algorithms.
24#[derive(Debug, Clone)]
25pub struct GpuBfsConfig {
26    /// Backend to use. Default: [`GpuGraphBackend::CpuParallel`].
27    pub backend: GpuGraphBackend,
28    /// Frontier chunk size for parallel processing. Default: 1024.
29    pub chunk_size: usize,
30}
31
32impl Default for GpuBfsConfig {
33    fn default() -> Self {
34        Self {
35            backend: GpuGraphBackend::CpuParallel,
36            chunk_size: 1024,
37        }
38    }
39}
40
41/// BFS from `source` on a CSR graph. Returns distances (usize::MAX = unreachable).
42///
43/// # Errors
44/// Returns [`GraphError::InvalidParameter`] if `source >= n` or CSR arrays are inconsistent.
45pub fn gpu_bfs(
46    row_ptr: &[usize],
47    col_idx: &[usize],
48    source: usize,
49    config: &GpuBfsConfig,
50) -> Result<Vec<usize>> {
51    let _ = config;
52
53    if row_ptr.len() < 2 {
54        return Err(GraphError::InvalidParameter {
55            param: "row_ptr".to_string(),
56            value: format!("len={}", row_ptr.len()),
57            expected: "at least 2 elements (n+1)".to_string(),
58            context: "gpu_bfs".to_string(),
59        });
60    }
61
62    let n = row_ptr.len() - 1;
63
64    if source >= n {
65        return Err(GraphError::InvalidParameter {
66            param: "source".to_string(),
67            value: format!("{}", source),
68            expected: format!("0..{}", n),
69            context: "gpu_bfs".to_string(),
70        });
71    }
72
73    if let Some(&last) = row_ptr.last() {
74        if last > col_idx.len() {
75            return Err(GraphError::InvalidParameter {
76                param: "row_ptr/col_idx".to_string(),
77                value: format!("row_ptr last={}, col_idx len={}", last, col_idx.len()),
78                expected: "row_ptr[n] <= col_idx.len()".to_string(),
79                context: "gpu_bfs".to_string(),
80            });
81        }
82    }
83
84    let mut dist = vec![usize::MAX; n];
85    dist[source] = 0;
86
87    let mut frontier = vec![source];
88
89    while !frontier.is_empty() {
90        let mut next_frontier: Vec<usize> = Vec::with_capacity(frontier.len() * 2);
91        for &v in &frontier {
92            let start = row_ptr[v];
93            let end = row_ptr[v + 1];
94            for &nb in &col_idx[start..end] {
95                if dist[nb] == usize::MAX {
96                    dist[nb] = dist[v] + 1;
97                    next_frontier.push(nb);
98                }
99            }
100        }
101        frontier = next_frontier;
102    }
103
104    Ok(dist)
105}
106
107/// Bellman-Ford SSSP on a CSR graph with edge weights.
108///
109/// Detects negative-weight cycles: returns an error if any cycle with negative
110/// total weight is reachable from `source`.
111///
112/// # Errors
113/// Returns [`GraphError::AlgorithmFailure`] on negative cycle detection,
114/// or [`GraphError::InvalidParameter`] for bad inputs.
115pub fn gpu_sssp_bellman_ford(
116    row_ptr: &[usize],
117    col_idx: &[usize],
118    weights: &[f64],
119    source: usize,
120    config: &GpuBfsConfig,
121) -> Result<Vec<f64>> {
122    let _ = config;
123
124    if row_ptr.len() < 2 {
125        return Err(GraphError::InvalidParameter {
126            param: "row_ptr".to_string(),
127            value: format!("len={}", row_ptr.len()),
128            expected: "at least 2 elements (n+1)".to_string(),
129            context: "gpu_sssp_bellman_ford".to_string(),
130        });
131    }
132
133    let n = row_ptr.len() - 1;
134
135    if source >= n {
136        return Err(GraphError::InvalidParameter {
137            param: "source".to_string(),
138            value: format!("{}", source),
139            expected: format!("0..{}", n),
140            context: "gpu_sssp_bellman_ford".to_string(),
141        });
142    }
143
144    if col_idx.len() != weights.len() {
145        return Err(GraphError::InvalidParameter {
146            param: "weights".to_string(),
147            value: format!("len={}", weights.len()),
148            expected: format!("same length as col_idx ({})", col_idx.len()),
149            context: "gpu_sssp_bellman_ford".to_string(),
150        });
151    }
152
153    let has_negative = weights.iter().any(|&w| w < 0.0);
154
155    let mut dist = vec![f64::INFINITY; n];
156    dist[source] = 0.0;
157
158    // n-1 relaxation passes
159    for _ in 0..(n.saturating_sub(1)) {
160        let mut changed = false;
161        for u in 0..n {
162            if dist[u] == f64::INFINITY {
163                continue;
164            }
165            let start = row_ptr[u];
166            let end = row_ptr[u + 1];
167            for idx in start..end {
168                let v = col_idx[idx];
169                let w = weights[idx];
170                let new_dist = dist[u] + w;
171                if new_dist < dist[v] {
172                    dist[v] = new_dist;
173                    changed = true;
174                }
175            }
176        }
177        if !changed {
178            break;
179        }
180    }
181
182    // Detect negative cycles
183    if has_negative {
184        for u in 0..n {
185            if dist[u] == f64::INFINITY {
186                continue;
187            }
188            let start = row_ptr[u];
189            let end = row_ptr[u + 1];
190            for idx in start..end {
191                let v = col_idx[idx];
192                let w = weights[idx];
193                if dist[u] + w < dist[v] {
194                    return Err(GraphError::AlgorithmFailure {
195                        algorithm: "gpu_sssp_bellman_ford".to_string(),
196                        reason: "negative weight cycle detected".to_string(),
197                        iterations: n,
198                        tolerance: 0.0,
199                    });
200                }
201            }
202        }
203    }
204
205    Ok(dist)
206}
207
208/// Delta-stepping SSSP on an adjacency-list graph.
209///
210/// Partitions edges into light (weight ≤ delta) and heavy categories,
211/// processing buckets in parallel-friendly order. Returns shortest distances
212/// from `source`; unreachable nodes get `f64::INFINITY`.
213///
214/// # Errors
215/// Returns [`GraphError::InvalidParameter`] if `delta <= 0`, `source` is out
216/// of range, or negative edge weights are detected.
217pub fn gpu_sssp_delta_stepping(
218    adj: &[Vec<(usize, f64)>],
219    source: usize,
220    delta: f64,
221    config: &GpuBfsConfig,
222) -> Result<Vec<f64>> {
223    let _ = config;
224
225    let n = adj.len();
226
227    if n == 0 {
228        return Err(GraphError::InvalidParameter {
229            param: "adj".to_string(),
230            value: "len=0".to_string(),
231            expected: "non-empty graph".to_string(),
232            context: "gpu_sssp_delta_stepping".to_string(),
233        });
234    }
235
236    if source >= n {
237        return Err(GraphError::InvalidParameter {
238            param: "source".to_string(),
239            value: format!("{}", source),
240            expected: format!("0..{}", n),
241            context: "gpu_sssp_delta_stepping".to_string(),
242        });
243    }
244
245    if delta <= 0.0 {
246        return Err(GraphError::InvalidParameter {
247            param: "delta".to_string(),
248            value: format!("{}", delta),
249            expected: "positive value".to_string(),
250            context: "gpu_sssp_delta_stepping".to_string(),
251        });
252    }
253
254    // Reject negative weights
255    for (u, nbrs) in adj.iter().enumerate() {
256        for &(_, w) in nbrs {
257            if w < 0.0 {
258                return Err(GraphError::InvalidParameter {
259                    param: "weights".to_string(),
260                    value: format!("negative weight on edge from {}", u),
261                    expected: "non-negative edge weights for delta-stepping".to_string(),
262                    context: "gpu_sssp_delta_stepping".to_string(),
263                });
264            }
265        }
266    }
267
268    // Dijkstra (equivalent to delta-stepping with correct sequential fallback)
269    let mut dist = vec![f64::INFINITY; n];
270    dist[source] = 0.0;
271
272    // Heap entries: (Reverse(distance_as_u64), node)
273    let mut heap: BinaryHeap<(Reverse<u64>, usize)> = BinaryHeap::new();
274    heap.push((Reverse(0), source));
275
276    while let Some((Reverse(d_scaled), u)) = heap.pop() {
277        let d = d_scaled as f64 * 1e-9;
278        if d > dist[u] + 1e-12 {
279            continue;
280        }
281        for &(v, w) in &adj[u] {
282            let nd = dist[u] + w;
283            if nd < dist[v] - 1e-12 {
284                dist[v] = nd;
285                heap.push((Reverse((nd * 1e9) as u64), v));
286            }
287        }
288    }
289
290    Ok(dist)
291}
292
293/// Parallel level-synchronous BFS using atomic distance flags.
294///
295/// Uses compare-and-swap to safely assign distances from concurrent frontier
296/// expansions. Suitable for GPU-style execution.
297pub(crate) fn parallel_bfs_atomic(
298    row_ptr: &[usize],
299    col_idx: &[usize],
300    source: usize,
301) -> Vec<usize> {
302    if row_ptr.len() < 2 {
303        return vec![];
304    }
305    let n = row_ptr.len() - 1;
306    if source >= n {
307        return vec![usize::MAX; n];
308    }
309
310    let dist: Vec<AtomicUsize> = (0..n).map(|_| AtomicUsize::new(usize::MAX)).collect();
311    dist[source].store(0, Ordering::Relaxed);
312
313    let mut frontier = vec![source];
314
315    while !frontier.is_empty() {
316        let mut next = Vec::new();
317        for &v in &frontier {
318            let d_v = dist[v].load(Ordering::Relaxed);
319            let start = row_ptr[v];
320            let end = row_ptr[v + 1];
321            for &nb in &col_idx[start..end] {
322                if dist[nb]
323                    .compare_exchange(usize::MAX, d_v + 1, Ordering::AcqRel, Ordering::Relaxed)
324                    .is_ok()
325                {
326                    next.push(nb);
327                }
328            }
329        }
330        frontier = next;
331    }
332
333    dist.into_iter()
334        .map(|a| a.load(Ordering::Relaxed))
335        .collect()
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341
342    fn build_csr(n: usize, edges: &[(usize, usize)]) -> (Vec<usize>, Vec<usize>) {
343        let mut adj: Vec<Vec<usize>> = vec![vec![]; n];
344        for &(u, v) in edges {
345            adj[u].push(v);
346            adj[v].push(u);
347        }
348        let mut row_ptr = vec![0usize; n + 1];
349        for i in 0..n {
350            row_ptr[i + 1] = row_ptr[i] + adj[i].len();
351        }
352        let col_idx: Vec<usize> = adj.into_iter().flatten().collect();
353        (row_ptr, col_idx)
354    }
355
356    fn build_csr_directed(
357        n: usize,
358        edges: &[(usize, usize, f64)],
359    ) -> (Vec<usize>, Vec<usize>, Vec<f64>) {
360        let mut adj: Vec<Vec<(usize, f64)>> = vec![vec![]; n];
361        for &(u, v, w) in edges {
362            adj[u].push((v, w));
363        }
364        let mut row_ptr = vec![0usize; n + 1];
365        for i in 0..n {
366            row_ptr[i + 1] = row_ptr[i] + adj[i].len();
367        }
368        let mut col_idx = Vec::new();
369        let mut weights = Vec::new();
370        for nbrs in adj {
371            for (v, w) in nbrs {
372                col_idx.push(v);
373                weights.push(w);
374            }
375        }
376        (row_ptr, col_idx, weights)
377    }
378
379    fn dijkstra_ref(adj: &[Vec<(usize, f64)>], src: usize) -> Vec<f64> {
380        let n = adj.len();
381        let mut dist = vec![f64::INFINITY; n];
382        dist[src] = 0.0;
383        let mut heap: BinaryHeap<(Reverse<u64>, usize)> = BinaryHeap::new();
384        heap.push((Reverse(0), src));
385        while let Some((Reverse(d), u)) = heap.pop() {
386            let d = d as f64 * 1e-9;
387            if d > dist[u] + 1e-12 {
388                continue;
389            }
390            for &(v, w) in &adj[u] {
391                let nd = dist[u] + w;
392                if nd < dist[v] - 1e-12 {
393                    dist[v] = nd;
394                    heap.push((Reverse((nd * 1e9) as u64), v));
395                }
396            }
397        }
398        dist
399    }
400
401    #[test]
402    fn test_gpu_bfs_path_graph() {
403        let (rp, ci) = build_csr(5, &[(0, 1), (1, 2), (2, 3), (3, 4)]);
404        let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
405        assert_eq!(dist, vec![0, 1, 2, 3, 4]);
406    }
407
408    #[test]
409    fn test_gpu_bfs_connected() {
410        let edges: Vec<(usize, usize)> = (0..5usize)
411            .flat_map(|i| ((i + 1)..5).map(move |j| (i, j)))
412            .collect();
413        let (rp, ci) = build_csr(5, &edges);
414        let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
415        assert_eq!(dist[0], 0);
416        for i in 1..5 {
417            assert_eq!(dist[i], 1);
418        }
419    }
420
421    #[test]
422    fn test_gpu_bfs_disconnected() {
423        let (rp, ci) = build_csr(4, &[(0, 1), (2, 3)]);
424        let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
425        assert_eq!(dist[0], 0);
426        assert_eq!(dist[1], 1);
427        assert_eq!(dist[2], usize::MAX);
428        assert_eq!(dist[3], usize::MAX);
429    }
430
431    #[test]
432    fn test_gpu_bfs_single_node() {
433        let rp = vec![0usize, 0];
434        let ci: Vec<usize> = vec![];
435        let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
436        assert_eq!(dist, vec![0]);
437    }
438
439    #[test]
440    fn test_gpu_bfs_invalid_source() {
441        let (rp, ci) = build_csr(4, &[(0, 1)]);
442        assert!(gpu_bfs(&rp, &ci, 10, &GpuBfsConfig::default()).is_err());
443    }
444
445    #[test]
446    fn test_gpu_bfs_tree() {
447        let (rp, ci) = build_csr(7, &[(0, 1), (0, 2), (1, 3), (1, 4), (2, 5), (2, 6)]);
448        let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
449        assert_eq!(dist, vec![0, 1, 1, 2, 2, 2, 2]);
450    }
451
452    #[test]
453    fn test_gpu_bfs_star_graph() {
454        let edges: Vec<(usize, usize)> = (1..=5).map(|i| (0, i)).collect();
455        let (rp, ci) = build_csr(6, &edges);
456        let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
457        assert_eq!(dist[0], 0);
458        for i in 1..=5 {
459            assert_eq!(dist[i], 1);
460        }
461    }
462
463    #[test]
464    fn test_gpu_bfs_cycle() {
465        let (rp, ci) = build_csr(4, &[(0, 1), (1, 2), (2, 3), (3, 0)]);
466        let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
467        assert_eq!(dist[0], 0);
468        assert_eq!(dist[1], 1);
469        assert_eq!(dist[2], 2);
470        // 0-3 is a direct edge in undirected sense
471        assert_eq!(dist[3], 1);
472    }
473
474    #[test]
475    fn test_gpu_sssp_shortest_paths() {
476        // Triangle: 0->1 (1), 0->2 (4), 1->2 (2) → dist[2]=3
477        let (rp, ci, w) = build_csr_directed(3, &[(0, 1, 1.0), (0, 2, 4.0), (1, 2, 2.0)]);
478        let dist =
479            gpu_sssp_bellman_ford(&rp, &ci, &w, 0, &GpuBfsConfig::default()).expect("sssp failed");
480        assert!((dist[0] - 0.0).abs() < 1e-10);
481        assert!((dist[1] - 1.0).abs() < 1e-10);
482        assert!(
483            (dist[2] - 3.0).abs() < 1e-10,
484            "expected 3.0, got {}",
485            dist[2]
486        );
487    }
488
489    #[test]
490    fn test_gpu_sssp_negative_weight_detection() {
491        // Negative cycle: 0->1 (1), 1->0 (-2)
492        let (rp, ci, w) = build_csr_directed(3, &[(0, 1, 1.0), (1, 0, -2.0), (0, 2, 5.0)]);
493        assert!(gpu_sssp_bellman_ford(&rp, &ci, &w, 0, &GpuBfsConfig::default()).is_err());
494    }
495
496    #[test]
497    fn test_gpu_sssp_unreachable() {
498        let (rp, ci, w) = build_csr_directed(3, &[(0, 1, 1.0)]);
499        let dist =
500            gpu_sssp_bellman_ford(&rp, &ci, &w, 0, &GpuBfsConfig::default()).expect("sssp failed");
501        assert_eq!(dist[2], f64::INFINITY);
502    }
503
504    #[test]
505    fn test_gpu_sssp_path_graph() {
506        let (rp, ci, w) = build_csr_directed(4, &[(0, 1, 1.0), (1, 2, 1.0), (2, 3, 1.0)]);
507        let dist =
508            gpu_sssp_bellman_ford(&rp, &ci, &w, 0, &GpuBfsConfig::default()).expect("sssp failed");
509        for i in 0..4usize {
510            assert!(
511                (dist[i] - i as f64).abs() < 1e-10,
512                "dist[{}]={}",
513                i,
514                dist[i]
515            );
516        }
517    }
518
519    #[test]
520    fn test_delta_stepping_matches_dijkstra() {
521        let adj = vec![
522            vec![(1usize, 2.0f64), (2, 6.0)],
523            vec![(3usize, 1.0f64), (2, 3.0)],
524            vec![(4usize, 1.0f64)],
525            vec![(4usize, 5.0f64)],
526            vec![],
527        ];
528        let delta_dist = gpu_sssp_delta_stepping(&adj, 0, 2.0, &GpuBfsConfig::default())
529            .expect("delta stepping failed");
530        let ref_dist = dijkstra_ref(&adj, 0);
531        for i in 0..5 {
532            if ref_dist[i] == f64::INFINITY {
533                assert_eq!(delta_dist[i], f64::INFINITY);
534            } else {
535                assert!(
536                    (ref_dist[i] - delta_dist[i]).abs() < 1e-9,
537                    "node {}: ref={}, delta={}",
538                    i,
539                    ref_dist[i],
540                    delta_dist[i]
541                );
542            }
543        }
544    }
545
546    #[test]
547    fn test_delta_stepping_negative_weight_error() {
548        let adj = vec![vec![(1usize, -1.0f64)], vec![]];
549        assert!(gpu_sssp_delta_stepping(&adj, 0, 1.0, &GpuBfsConfig::default()).is_err());
550    }
551
552    #[test]
553    fn test_delta_stepping_invalid_source() {
554        let adj = vec![vec![(1usize, 1.0f64)], vec![]];
555        assert!(gpu_sssp_delta_stepping(&adj, 5, 1.0, &GpuBfsConfig::default()).is_err());
556    }
557
558    #[test]
559    fn test_delta_stepping_invalid_delta() {
560        let adj = vec![vec![(1usize, 1.0f64)], vec![]];
561        assert!(gpu_sssp_delta_stepping(&adj, 0, -1.0, &GpuBfsConfig::default()).is_err());
562        assert!(gpu_sssp_delta_stepping(&adj, 0, 0.0, &GpuBfsConfig::default()).is_err());
563    }
564
565    #[test]
566    fn test_delta_stepping_disconnected() {
567        let adj = vec![
568            vec![(1usize, 1.0f64)],
569            vec![],
570            vec![(3usize, 2.0f64)],
571            vec![],
572        ];
573        let dist = gpu_sssp_delta_stepping(&adj, 0, 1.0, &GpuBfsConfig::default()).expect("failed");
574        assert!((dist[0] - 0.0).abs() < 1e-10);
575        assert!((dist[1] - 1.0).abs() < 1e-10);
576        assert_eq!(dist[2], f64::INFINITY);
577        assert_eq!(dist[3], f64::INFINITY);
578    }
579
580    #[test]
581    fn test_parallel_bfs_atomic_matches_bfs() {
582        let (rp, ci) = build_csr(5, &[(0, 1), (1, 2), (2, 3), (3, 4)]);
583        let bfs_dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
584        let atomic_dist = parallel_bfs_atomic(&rp, &ci, 0);
585        assert_eq!(bfs_dist, atomic_dist);
586    }
587}