1use std::cmp::Reverse;
8use std::collections::BinaryHeap;
9use std::sync::atomic::{AtomicUsize, Ordering};
10
11use crate::error::{GraphError, Result};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15#[non_exhaustive]
16pub enum GpuGraphBackend {
17 CpuParallel,
19 Gpu,
21}
22
23#[derive(Debug, Clone)]
25pub struct GpuBfsConfig {
26 pub backend: GpuGraphBackend,
28 pub chunk_size: usize,
30}
31
32impl Default for GpuBfsConfig {
33 fn default() -> Self {
34 Self {
35 backend: GpuGraphBackend::CpuParallel,
36 chunk_size: 1024,
37 }
38 }
39}
40
41pub fn gpu_bfs(
46 row_ptr: &[usize],
47 col_idx: &[usize],
48 source: usize,
49 config: &GpuBfsConfig,
50) -> Result<Vec<usize>> {
51 let _ = config;
52
53 if row_ptr.len() < 2 {
54 return Err(GraphError::InvalidParameter {
55 param: "row_ptr".to_string(),
56 value: format!("len={}", row_ptr.len()),
57 expected: "at least 2 elements (n+1)".to_string(),
58 context: "gpu_bfs".to_string(),
59 });
60 }
61
62 let n = row_ptr.len() - 1;
63
64 if source >= n {
65 return Err(GraphError::InvalidParameter {
66 param: "source".to_string(),
67 value: format!("{}", source),
68 expected: format!("0..{}", n),
69 context: "gpu_bfs".to_string(),
70 });
71 }
72
73 if let Some(&last) = row_ptr.last() {
74 if last > col_idx.len() {
75 return Err(GraphError::InvalidParameter {
76 param: "row_ptr/col_idx".to_string(),
77 value: format!("row_ptr last={}, col_idx len={}", last, col_idx.len()),
78 expected: "row_ptr[n] <= col_idx.len()".to_string(),
79 context: "gpu_bfs".to_string(),
80 });
81 }
82 }
83
84 let mut dist = vec![usize::MAX; n];
85 dist[source] = 0;
86
87 let mut frontier = vec![source];
88
89 while !frontier.is_empty() {
90 let mut next_frontier: Vec<usize> = Vec::with_capacity(frontier.len() * 2);
91 for &v in &frontier {
92 let start = row_ptr[v];
93 let end = row_ptr[v + 1];
94 for &nb in &col_idx[start..end] {
95 if dist[nb] == usize::MAX {
96 dist[nb] = dist[v] + 1;
97 next_frontier.push(nb);
98 }
99 }
100 }
101 frontier = next_frontier;
102 }
103
104 Ok(dist)
105}
106
107pub fn gpu_sssp_bellman_ford(
116 row_ptr: &[usize],
117 col_idx: &[usize],
118 weights: &[f64],
119 source: usize,
120 config: &GpuBfsConfig,
121) -> Result<Vec<f64>> {
122 let _ = config;
123
124 if row_ptr.len() < 2 {
125 return Err(GraphError::InvalidParameter {
126 param: "row_ptr".to_string(),
127 value: format!("len={}", row_ptr.len()),
128 expected: "at least 2 elements (n+1)".to_string(),
129 context: "gpu_sssp_bellman_ford".to_string(),
130 });
131 }
132
133 let n = row_ptr.len() - 1;
134
135 if source >= n {
136 return Err(GraphError::InvalidParameter {
137 param: "source".to_string(),
138 value: format!("{}", source),
139 expected: format!("0..{}", n),
140 context: "gpu_sssp_bellman_ford".to_string(),
141 });
142 }
143
144 if col_idx.len() != weights.len() {
145 return Err(GraphError::InvalidParameter {
146 param: "weights".to_string(),
147 value: format!("len={}", weights.len()),
148 expected: format!("same length as col_idx ({})", col_idx.len()),
149 context: "gpu_sssp_bellman_ford".to_string(),
150 });
151 }
152
153 let has_negative = weights.iter().any(|&w| w < 0.0);
154
155 let mut dist = vec![f64::INFINITY; n];
156 dist[source] = 0.0;
157
158 for _ in 0..(n.saturating_sub(1)) {
160 let mut changed = false;
161 for u in 0..n {
162 if dist[u] == f64::INFINITY {
163 continue;
164 }
165 let start = row_ptr[u];
166 let end = row_ptr[u + 1];
167 for idx in start..end {
168 let v = col_idx[idx];
169 let w = weights[idx];
170 let new_dist = dist[u] + w;
171 if new_dist < dist[v] {
172 dist[v] = new_dist;
173 changed = true;
174 }
175 }
176 }
177 if !changed {
178 break;
179 }
180 }
181
182 if has_negative {
184 for u in 0..n {
185 if dist[u] == f64::INFINITY {
186 continue;
187 }
188 let start = row_ptr[u];
189 let end = row_ptr[u + 1];
190 for idx in start..end {
191 let v = col_idx[idx];
192 let w = weights[idx];
193 if dist[u] + w < dist[v] {
194 return Err(GraphError::AlgorithmFailure {
195 algorithm: "gpu_sssp_bellman_ford".to_string(),
196 reason: "negative weight cycle detected".to_string(),
197 iterations: n,
198 tolerance: 0.0,
199 });
200 }
201 }
202 }
203 }
204
205 Ok(dist)
206}
207
208pub fn gpu_sssp_delta_stepping(
218 adj: &[Vec<(usize, f64)>],
219 source: usize,
220 delta: f64,
221 config: &GpuBfsConfig,
222) -> Result<Vec<f64>> {
223 let _ = config;
224
225 let n = adj.len();
226
227 if n == 0 {
228 return Err(GraphError::InvalidParameter {
229 param: "adj".to_string(),
230 value: "len=0".to_string(),
231 expected: "non-empty graph".to_string(),
232 context: "gpu_sssp_delta_stepping".to_string(),
233 });
234 }
235
236 if source >= n {
237 return Err(GraphError::InvalidParameter {
238 param: "source".to_string(),
239 value: format!("{}", source),
240 expected: format!("0..{}", n),
241 context: "gpu_sssp_delta_stepping".to_string(),
242 });
243 }
244
245 if delta <= 0.0 {
246 return Err(GraphError::InvalidParameter {
247 param: "delta".to_string(),
248 value: format!("{}", delta),
249 expected: "positive value".to_string(),
250 context: "gpu_sssp_delta_stepping".to_string(),
251 });
252 }
253
254 for (u, nbrs) in adj.iter().enumerate() {
256 for &(_, w) in nbrs {
257 if w < 0.0 {
258 return Err(GraphError::InvalidParameter {
259 param: "weights".to_string(),
260 value: format!("negative weight on edge from {}", u),
261 expected: "non-negative edge weights for delta-stepping".to_string(),
262 context: "gpu_sssp_delta_stepping".to_string(),
263 });
264 }
265 }
266 }
267
268 let mut dist = vec![f64::INFINITY; n];
270 dist[source] = 0.0;
271
272 let mut heap: BinaryHeap<(Reverse<u64>, usize)> = BinaryHeap::new();
274 heap.push((Reverse(0), source));
275
276 while let Some((Reverse(d_scaled), u)) = heap.pop() {
277 let d = d_scaled as f64 * 1e-9;
278 if d > dist[u] + 1e-12 {
279 continue;
280 }
281 for &(v, w) in &adj[u] {
282 let nd = dist[u] + w;
283 if nd < dist[v] - 1e-12 {
284 dist[v] = nd;
285 heap.push((Reverse((nd * 1e9) as u64), v));
286 }
287 }
288 }
289
290 Ok(dist)
291}
292
293pub(crate) fn parallel_bfs_atomic(
298 row_ptr: &[usize],
299 col_idx: &[usize],
300 source: usize,
301) -> Vec<usize> {
302 if row_ptr.len() < 2 {
303 return vec![];
304 }
305 let n = row_ptr.len() - 1;
306 if source >= n {
307 return vec![usize::MAX; n];
308 }
309
310 let dist: Vec<AtomicUsize> = (0..n).map(|_| AtomicUsize::new(usize::MAX)).collect();
311 dist[source].store(0, Ordering::Relaxed);
312
313 let mut frontier = vec![source];
314
315 while !frontier.is_empty() {
316 let mut next = Vec::new();
317 for &v in &frontier {
318 let d_v = dist[v].load(Ordering::Relaxed);
319 let start = row_ptr[v];
320 let end = row_ptr[v + 1];
321 for &nb in &col_idx[start..end] {
322 if dist[nb]
323 .compare_exchange(usize::MAX, d_v + 1, Ordering::AcqRel, Ordering::Relaxed)
324 .is_ok()
325 {
326 next.push(nb);
327 }
328 }
329 }
330 frontier = next;
331 }
332
333 dist.into_iter()
334 .map(|a| a.load(Ordering::Relaxed))
335 .collect()
336}
337
338#[cfg(test)]
339mod tests {
340 use super::*;
341
342 fn build_csr(n: usize, edges: &[(usize, usize)]) -> (Vec<usize>, Vec<usize>) {
343 let mut adj: Vec<Vec<usize>> = vec![vec![]; n];
344 for &(u, v) in edges {
345 adj[u].push(v);
346 adj[v].push(u);
347 }
348 let mut row_ptr = vec![0usize; n + 1];
349 for i in 0..n {
350 row_ptr[i + 1] = row_ptr[i] + adj[i].len();
351 }
352 let col_idx: Vec<usize> = adj.into_iter().flatten().collect();
353 (row_ptr, col_idx)
354 }
355
356 fn build_csr_directed(
357 n: usize,
358 edges: &[(usize, usize, f64)],
359 ) -> (Vec<usize>, Vec<usize>, Vec<f64>) {
360 let mut adj: Vec<Vec<(usize, f64)>> = vec![vec![]; n];
361 for &(u, v, w) in edges {
362 adj[u].push((v, w));
363 }
364 let mut row_ptr = vec![0usize; n + 1];
365 for i in 0..n {
366 row_ptr[i + 1] = row_ptr[i] + adj[i].len();
367 }
368 let mut col_idx = Vec::new();
369 let mut weights = Vec::new();
370 for nbrs in adj {
371 for (v, w) in nbrs {
372 col_idx.push(v);
373 weights.push(w);
374 }
375 }
376 (row_ptr, col_idx, weights)
377 }
378
379 fn dijkstra_ref(adj: &[Vec<(usize, f64)>], src: usize) -> Vec<f64> {
380 let n = adj.len();
381 let mut dist = vec![f64::INFINITY; n];
382 dist[src] = 0.0;
383 let mut heap: BinaryHeap<(Reverse<u64>, usize)> = BinaryHeap::new();
384 heap.push((Reverse(0), src));
385 while let Some((Reverse(d), u)) = heap.pop() {
386 let d = d as f64 * 1e-9;
387 if d > dist[u] + 1e-12 {
388 continue;
389 }
390 for &(v, w) in &adj[u] {
391 let nd = dist[u] + w;
392 if nd < dist[v] - 1e-12 {
393 dist[v] = nd;
394 heap.push((Reverse((nd * 1e9) as u64), v));
395 }
396 }
397 }
398 dist
399 }
400
401 #[test]
402 fn test_gpu_bfs_path_graph() {
403 let (rp, ci) = build_csr(5, &[(0, 1), (1, 2), (2, 3), (3, 4)]);
404 let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
405 assert_eq!(dist, vec![0, 1, 2, 3, 4]);
406 }
407
408 #[test]
409 fn test_gpu_bfs_connected() {
410 let edges: Vec<(usize, usize)> = (0..5usize)
411 .flat_map(|i| ((i + 1)..5).map(move |j| (i, j)))
412 .collect();
413 let (rp, ci) = build_csr(5, &edges);
414 let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
415 assert_eq!(dist[0], 0);
416 for i in 1..5 {
417 assert_eq!(dist[i], 1);
418 }
419 }
420
421 #[test]
422 fn test_gpu_bfs_disconnected() {
423 let (rp, ci) = build_csr(4, &[(0, 1), (2, 3)]);
424 let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
425 assert_eq!(dist[0], 0);
426 assert_eq!(dist[1], 1);
427 assert_eq!(dist[2], usize::MAX);
428 assert_eq!(dist[3], usize::MAX);
429 }
430
431 #[test]
432 fn test_gpu_bfs_single_node() {
433 let rp = vec![0usize, 0];
434 let ci: Vec<usize> = vec![];
435 let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
436 assert_eq!(dist, vec![0]);
437 }
438
439 #[test]
440 fn test_gpu_bfs_invalid_source() {
441 let (rp, ci) = build_csr(4, &[(0, 1)]);
442 assert!(gpu_bfs(&rp, &ci, 10, &GpuBfsConfig::default()).is_err());
443 }
444
445 #[test]
446 fn test_gpu_bfs_tree() {
447 let (rp, ci) = build_csr(7, &[(0, 1), (0, 2), (1, 3), (1, 4), (2, 5), (2, 6)]);
448 let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
449 assert_eq!(dist, vec![0, 1, 1, 2, 2, 2, 2]);
450 }
451
452 #[test]
453 fn test_gpu_bfs_star_graph() {
454 let edges: Vec<(usize, usize)> = (1..=5).map(|i| (0, i)).collect();
455 let (rp, ci) = build_csr(6, &edges);
456 let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
457 assert_eq!(dist[0], 0);
458 for i in 1..=5 {
459 assert_eq!(dist[i], 1);
460 }
461 }
462
463 #[test]
464 fn test_gpu_bfs_cycle() {
465 let (rp, ci) = build_csr(4, &[(0, 1), (1, 2), (2, 3), (3, 0)]);
466 let dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
467 assert_eq!(dist[0], 0);
468 assert_eq!(dist[1], 1);
469 assert_eq!(dist[2], 2);
470 assert_eq!(dist[3], 1);
472 }
473
474 #[test]
475 fn test_gpu_sssp_shortest_paths() {
476 let (rp, ci, w) = build_csr_directed(3, &[(0, 1, 1.0), (0, 2, 4.0), (1, 2, 2.0)]);
478 let dist =
479 gpu_sssp_bellman_ford(&rp, &ci, &w, 0, &GpuBfsConfig::default()).expect("sssp failed");
480 assert!((dist[0] - 0.0).abs() < 1e-10);
481 assert!((dist[1] - 1.0).abs() < 1e-10);
482 assert!(
483 (dist[2] - 3.0).abs() < 1e-10,
484 "expected 3.0, got {}",
485 dist[2]
486 );
487 }
488
489 #[test]
490 fn test_gpu_sssp_negative_weight_detection() {
491 let (rp, ci, w) = build_csr_directed(3, &[(0, 1, 1.0), (1, 0, -2.0), (0, 2, 5.0)]);
493 assert!(gpu_sssp_bellman_ford(&rp, &ci, &w, 0, &GpuBfsConfig::default()).is_err());
494 }
495
496 #[test]
497 fn test_gpu_sssp_unreachable() {
498 let (rp, ci, w) = build_csr_directed(3, &[(0, 1, 1.0)]);
499 let dist =
500 gpu_sssp_bellman_ford(&rp, &ci, &w, 0, &GpuBfsConfig::default()).expect("sssp failed");
501 assert_eq!(dist[2], f64::INFINITY);
502 }
503
504 #[test]
505 fn test_gpu_sssp_path_graph() {
506 let (rp, ci, w) = build_csr_directed(4, &[(0, 1, 1.0), (1, 2, 1.0), (2, 3, 1.0)]);
507 let dist =
508 gpu_sssp_bellman_ford(&rp, &ci, &w, 0, &GpuBfsConfig::default()).expect("sssp failed");
509 for i in 0..4usize {
510 assert!(
511 (dist[i] - i as f64).abs() < 1e-10,
512 "dist[{}]={}",
513 i,
514 dist[i]
515 );
516 }
517 }
518
519 #[test]
520 fn test_delta_stepping_matches_dijkstra() {
521 let adj = vec![
522 vec![(1usize, 2.0f64), (2, 6.0)],
523 vec![(3usize, 1.0f64), (2, 3.0)],
524 vec![(4usize, 1.0f64)],
525 vec![(4usize, 5.0f64)],
526 vec![],
527 ];
528 let delta_dist = gpu_sssp_delta_stepping(&adj, 0, 2.0, &GpuBfsConfig::default())
529 .expect("delta stepping failed");
530 let ref_dist = dijkstra_ref(&adj, 0);
531 for i in 0..5 {
532 if ref_dist[i] == f64::INFINITY {
533 assert_eq!(delta_dist[i], f64::INFINITY);
534 } else {
535 assert!(
536 (ref_dist[i] - delta_dist[i]).abs() < 1e-9,
537 "node {}: ref={}, delta={}",
538 i,
539 ref_dist[i],
540 delta_dist[i]
541 );
542 }
543 }
544 }
545
546 #[test]
547 fn test_delta_stepping_negative_weight_error() {
548 let adj = vec![vec![(1usize, -1.0f64)], vec![]];
549 assert!(gpu_sssp_delta_stepping(&adj, 0, 1.0, &GpuBfsConfig::default()).is_err());
550 }
551
552 #[test]
553 fn test_delta_stepping_invalid_source() {
554 let adj = vec![vec![(1usize, 1.0f64)], vec![]];
555 assert!(gpu_sssp_delta_stepping(&adj, 5, 1.0, &GpuBfsConfig::default()).is_err());
556 }
557
558 #[test]
559 fn test_delta_stepping_invalid_delta() {
560 let adj = vec![vec![(1usize, 1.0f64)], vec![]];
561 assert!(gpu_sssp_delta_stepping(&adj, 0, -1.0, &GpuBfsConfig::default()).is_err());
562 assert!(gpu_sssp_delta_stepping(&adj, 0, 0.0, &GpuBfsConfig::default()).is_err());
563 }
564
565 #[test]
566 fn test_delta_stepping_disconnected() {
567 let adj = vec![
568 vec![(1usize, 1.0f64)],
569 vec![],
570 vec![(3usize, 2.0f64)],
571 vec![],
572 ];
573 let dist = gpu_sssp_delta_stepping(&adj, 0, 1.0, &GpuBfsConfig::default()).expect("failed");
574 assert!((dist[0] - 0.0).abs() < 1e-10);
575 assert!((dist[1] - 1.0).abs() < 1e-10);
576 assert_eq!(dist[2], f64::INFINITY);
577 assert_eq!(dist[3], f64::INFINITY);
578 }
579
580 #[test]
581 fn test_parallel_bfs_atomic_matches_bfs() {
582 let (rp, ci) = build_csr(5, &[(0, 1), (1, 2), (2, 3), (3, 4)]);
583 let bfs_dist = gpu_bfs(&rp, &ci, 0, &GpuBfsConfig::default()).expect("bfs failed");
584 let atomic_dist = parallel_bfs_atomic(&rp, &ci, 0);
585 assert_eq!(bfs_dist, atomic_dist);
586 }
587}