// WGSL lowering for `workgroup.visitor` — a workgroup-scoped mark
// and emit visitor backed by a shared bitmap and a bounded emit
// buffer. The invariant is "every first-visit writes one emit
// slot; subsequent visits are no-ops". CAS-style atomic ORs over
// the visited bitmap guarantee at-most-once emission across lanes.
//
// Commands:
// op = 1 → visit(node) → result.status = 0 (first) | 1 (already),
// result.node = node
// op = 2 → emitted_len() → result.len = current emit_buffer length
// op = 3 → reset() → clears the bitmap and the emit buffer
//
// Status words:
// 0 = first_visit, 1 = already_visited, 2 = out_of_range.
const VISITOR_UNIVERSE: u32 = 256u;
const VISITOR_EMIT_CAPACITY: u32 = 256u;
const VISITOR_OP_VISIT: u32 = 1u;
const VISITOR_OP_LEN: u32 = 2u;
const VISITOR_OP_RESET: u32 = 3u;
const VISITOR_FIRST_VISIT: u32 = 0u;
const VISITOR_ALREADY_VISITED: u32 = 1u;
const VISITOR_OUT_OF_RANGE: u32 = 2u;
struct VisitorCommand {
op: u32,
node: u32,
reserved: u32,
lane: u32,
}
struct VisitorResult {
status: u32,
node: u32,
emitted_len: u32,
reserved: u32,
}
@group(0) @binding(0) var<storage, read> visitor_commands: array<VisitorCommand>;
@group(0) @binding(1) var<storage, read_write> visitor_results: array<VisitorResult>;
@group(0) @binding(2) var<storage, read_write> visitor_emit_out: array<u32>;
var<workgroup> visitor_bitmap: array<atomic<u32>, 8>; // 256 bits.
var<workgroup> visitor_emit_len: atomic<u32>;
fn bit_mask(node: u32) -> u32 {
return 1u << (node % 32u);
}
fn bit_word(node: u32) -> u32 {
return node / 32u;
}
@compute @workgroup_size(64, 1, 1)
fn workgroup_visitor_visit(@builtin(local_invocation_id) local_id: vec3<u32>) {
let lane = local_id.x;
if (lane == 0u) {
for (var i: u32 = 0u; i < 8u; i = i + 1u) {
atomicStore(&visitor_bitmap[i], 0u);
}
atomicStore(&visitor_emit_len, 0u);
}
workgroupBarrier();
let command_count = arrayLength(&visitor_commands);
var command_index = 0u;
loop {
if (command_index >= command_count) {
break;
}
if (visitor_commands[command_index].lane == lane) {
let cmd = visitor_commands[command_index];
var result: VisitorResult;
result.status = 0u;
result.node = cmd.node;
result.emitted_len = 0u;
result.reserved = 0u;
switch (cmd.op) {
case 1u: { // visit
if (cmd.node >= VISITOR_UNIVERSE) {
result.status = VISITOR_OUT_OF_RANGE;
} else {
let mask = bit_mask(cmd.node);
let word = bit_word(cmd.node);
let prev = atomicOr(&visitor_bitmap[word], mask);
if ((prev & mask) == 0u) {
let slot = atomicAdd(&visitor_emit_len, 1u);
if (slot < VISITOR_EMIT_CAPACITY) {
visitor_emit_out[slot] = cmd.node;
}
result.status = VISITOR_FIRST_VISIT;
} else {
result.status = VISITOR_ALREADY_VISITED;
}
}
}
case 2u: { // emitted_len
result.emitted_len = atomicLoad(&visitor_emit_len);
}
case 3u: { // reset
// Clear the visited bitmap.
for (var i: u32 = 0u; i < 8u; i = i + 1u) {
atomicStore(&visitor_bitmap[i], 0u);
}
// Audit fix (workgroup-primitives C3): zero the
// emit buffer contents, not just the length
// counter. Prior behavior left prior-dispatch
// node ids visible to any host that maps the
// visitor_emit_out binding directly, leaking
// info across dispatch boundaries. CPU
// WorkgroupVisitor::reset() already clears the
// Vec; the shader must match for byte-identical
// parity.
for (var i: u32 = 0u; i < VISITOR_EMIT_CAPACITY; i = i + 1u) {
visitor_emit_out[i] = 0u;
}
atomicStore(&visitor_emit_len, 0u);
}
default: {}
}
visitor_results[command_index] = result;
}
workgroupBarrier();
command_index = command_index + 1u;
}
}