// Simplified GPU BFS - Level-Synchronous (easier to implement than frontier-based)
//
// Algorithm:
// 1. Initialize all distances to u32::MAX except source (0)
// 2. For each level, process all nodes at current distance
// 3. Update unvisited neighbors to distance + 1
// 4. Repeat until no updates
struct BfsParams {
num_nodes: u32,
current_level: u32,
source_node: u32,
_padding: u32,
}
@group(0) @binding(0) var<uniform> params: BfsParams;
@group(0) @binding(1) var<storage, read> row_offsets: array<u32>;
@group(0) @binding(2) var<storage, read> col_indices: array<u32>;
@group(0) @binding(3) var<storage, read_write> distances: array<atomic<u32>>;
@group(0) @binding(4) var<storage, read_write> updated: atomic<u32>;
// Process one node per thread
@compute @workgroup_size(256)
fn bfs_level(@builtin(global_invocation_id) global_id: vec3<u32>) {
let node_id = global_id.x;
// Bounds check
if (node_id >= params.num_nodes) {
return;
}
// Only process nodes at current level
let dist = atomicLoad(&distances[node_id]);
if (dist != params.current_level) {
return;
}
// Process all neighbors
let start = row_offsets[node_id];
let end = row_offsets[node_id + 1u];
for (var i = start; i < end; i++) {
let neighbor = col_indices[i];
// Try to update neighbor's distance (atomic compare-exchange)
let old_dist = atomicMin(&distances[neighbor], params.current_level + 1u);
// If we updated a neighbor, mark that work was done
if (old_dist > params.current_level + 1u) {
atomicStore(&updated, 1u);
}
}
}