// WGSL lowering for `workgroup.string_interner` — a bounded FNV-1a
// string interner resident in workgroup SRAM.
//
// The interner keeps two parallel tables in shared memory:
//
// var<workgroup> interner_slots: array<SlotEntry, 256>;
// var<workgroup> interner_bytes: array<atomic<u32>, 512>;
//
// Each slot stores `(hash, byte_offset, byte_len)`. A symbol is the
// slot index. Intern commands first FNV-1a-hash their payload, probe
// the slot table linearly (open-addressing) until they find either a
// matching slot (existing symbol) or an empty slot (new symbol).
//
// Commands:
// op = 1 → intern(bytes_offset, bytes_len) → result.symbol = id, status
// op = 2 → lookup(symbol) → result.bytes_offset/len, status
// op = 3 → len() → result.len = slot count
//
// Status words:
// 0 = ok, 1 = out_of_slots, 2 = out_of_bytes, 3 = unknown_symbol.
//
// The FNV-1a prime and offset basis are hard-coded to keep CPU/GPU
// parity byte-identical.
const INTERNER_SLOT_CAPACITY: u32 = 256u;
const INTERNER_BYTE_CAPACITY: u32 = 2048u;
const INTERNER_BYTE_WORD_CAPACITY: u32 = 512u;
const INTERNER_WORKGROUP_SIZE: u32 = 64u;
const FNV_OFFSET_BASIS: u32 = 0x811C9DC5u;
const FNV_PRIME: u32 = 0x01000193u;
const INTERNER_OP_INTERN: u32 = 1u;
const INTERNER_OP_LOOKUP: u32 = 2u;
const INTERNER_OP_LEN: u32 = 3u;
const INTERNER_OK: u32 = 0u;
const INTERNER_OUT_OF_SLOTS: u32 = 1u;
const INTERNER_OUT_OF_BYTES: u32 = 2u;
const INTERNER_UNKNOWN_SYMBOL: u32 = 3u;
struct InternerCommand {
op: u32,
bytes_offset: u32,
bytes_len: u32,
symbol: u32,
}
struct InternerResult {
status: u32,
symbol: u32,
bytes_offset: u32,
bytes_len: u32,
}
@group(0) @binding(0) var<storage, read> interner_commands: array<InternerCommand>;
@group(0) @binding(1) var<storage, read> interner_source_bytes: array<u32>;
@group(0) @binding(2) var<storage, read_write> interner_results: array<InternerResult>;
struct SlotEntry {
hash: u32,
byte_offset: u32,
byte_len: u32,
reserved: u32,
}
var<workgroup> interner_slots: array<SlotEntry, 256>;
var<workgroup> interner_bytes: array<atomic<u32>, 512>;
var<workgroup> interner_slot_len: atomic<u32>;
var<workgroup> interner_byte_len: atomic<u32>;
fn read_source_byte(offset: u32) -> u32 {
let word_idx = offset / 4u;
let byte_idx = offset % 4u;
return (interner_source_bytes[word_idx] >> (byte_idx * 8u)) & 0xFFu;
}
fn fnv1a(offset: u32, len: u32) -> u32 {
var hash: u32 = FNV_OFFSET_BASIS;
for (var i: u32 = 0u; i < len; i = i + 1u) {
hash = hash ^ read_source_byte(offset + i);
hash = hash * FNV_PRIME;
}
return hash;
}
fn compare_existing(offset_a: u32, len_a: u32, offset_b: u32, len_b: u32) -> bool {
if (len_a != len_b) {
return false;
}
for (var i: u32 = 0u; i < len_a; i = i + 1u) {
let lhs_word = atomicLoad(&interner_bytes[(offset_a + i) / 4u]);
let lhs = (lhs_word >> (((offset_a + i) % 4u) * 8u)) & 0xFFu;
let rhs = read_source_byte(offset_b + i);
if (lhs != rhs) {
return false;
}
}
return true;
}
@compute @workgroup_size(64, 1, 1)
fn workgroup_string_interner_intern(@builtin(local_invocation_id) local_id: vec3<u32>) {
let lane = local_id.x;
if (lane == 0u) {
atomicStore(&interner_slot_len, 0u);
atomicStore(&interner_byte_len, 0u);
}
for (var word_idx = lane; word_idx < INTERNER_BYTE_WORD_CAPACITY; word_idx = word_idx + INTERNER_WORKGROUP_SIZE) {
atomicStore(&interner_bytes[word_idx], 0u);
}
workgroupBarrier();
let command_count = arrayLength(&interner_commands);
var command_index = 0u;
loop {
if (command_index >= command_count) {
break;
}
if (interner_commands[command_index].symbol == lane) {
let cmd = interner_commands[command_index];
var result: InternerResult;
result.status = INTERNER_OK;
result.symbol = 0u;
result.bytes_offset = 0u;
result.bytes_len = 0u;
switch (cmd.op) {
case 1u: { // intern
let hash = fnv1a(cmd.bytes_offset, cmd.bytes_len);
let slot_count = atomicLoad(&interner_slot_len);
var found: u32 = 0xFFFFFFFFu;
for (var i: u32 = 0u; i < slot_count; i = i + 1u) {
if (interner_slots[i].hash == hash
&& compare_existing(
interner_slots[i].byte_offset,
interner_slots[i].byte_len,
cmd.bytes_offset,
cmd.bytes_len,
)) {
found = i;
break;
}
}
if (found != 0xFFFFFFFFu) {
result.symbol = found;
} else if (slot_count >= INTERNER_SLOT_CAPACITY) {
result.status = INTERNER_OUT_OF_SLOTS;
} else {
let byte_len = atomicLoad(&interner_byte_len);
if (byte_len + cmd.bytes_len > INTERNER_BYTE_CAPACITY) {
result.status = INTERNER_OUT_OF_BYTES;
} else {
for (var i: u32 = 0u; i < cmd.bytes_len; i = i + 1u) {
let dst = byte_len + i;
let src = read_source_byte(cmd.bytes_offset + i);
let word_idx = dst / 4u;
let shift = (dst % 4u) * 8u;
atomicOr(&interner_bytes[word_idx], src << shift);
}
interner_slots[slot_count].hash = hash;
interner_slots[slot_count].byte_offset = byte_len;
interner_slots[slot_count].byte_len = cmd.bytes_len;
atomicStore(&interner_byte_len, byte_len + cmd.bytes_len);
atomicStore(&interner_slot_len, slot_count + 1u);
result.symbol = slot_count;
}
}
}
case 2u: { // lookup
let slot_count = atomicLoad(&interner_slot_len);
if (cmd.symbol >= slot_count) {
result.status = INTERNER_UNKNOWN_SYMBOL;
} else {
result.symbol = cmd.symbol;
result.bytes_offset = interner_slots[cmd.symbol].byte_offset;
result.bytes_len = interner_slots[cmd.symbol].byte_len;
}
}
case 3u: { // len
result.symbol = atomicLoad(&interner_slot_len);
}
default: {}
}
interner_results[command_index] = result;
}
workgroupBarrier();
command_index = command_index + 1u;
}
}