vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
@group(0) @binding(0) var<storage, read> input_words: array<u64>;
@group(0) @binding(1) var<storage, read_write> output_words: array<u64>;

const XXH3_P1: u64 = 0x9e3779b185ebca87u;
const XXH3_P2: u64 = 0xc2b2ae3d27d4eb4fu;
const XXH3_P3: u64 = 0x165667b19e3779f9u;
const XXH3_P4: u64 = 0x85ebca77c2b2ae63u;
const XXH3_P5: u64 = 0x27d4eb2f165667c5u;
const XXH3_SECRET: array<u64, 12> = array<u64, 12>(
  0xbe4ba423396cfeb8u,0x1cad21f72c81017cu,0xdb979083e96dd4deu,0x1f67b3b7a4a44072u,
  0x78e5c0cc4ee679cbu,0x2172ffcc7dd05a82u,0x8e2443f7744608b8u,0x4c263a81e69035e0u,
  0xcb00c391bb52283cu,0xa32e531b8b65d088u,0x974864714ef90da2u,0xd8acdea946ef1938u);

fn xxh3_rotl(x: u64, n: u32) -> u64 { return (x << n) | (x >> ((64u - n) & 63u)); }
fn xxh3_avalanche(x_in: u64) -> u64 {
  var x = x_in;
  x = x ^ (x >> 37u);
  x = x * 0x165667919e3779f9u;
  return x ^ (x >> 32u);
}
fn xxh3_mix(a: u64, b: u64, secret: u64) -> u64 {
  let lhs = a ^ secret;
  let rhs = b ^ xxh3_rotl(secret, 31u);
  let lo = (lhs & 0xffffffffu) * (rhs & 0xffffffffu);
  let hi = (lhs >> 32u) * (rhs >> 32u);
  return (lo ^ hi) + lhs + rhs;
}

@compute @workgroup_size(1, 1, 1)
fn hash_xxhash3_64(@builtin(global_invocation_id) id: vec3<u32>) {
  if (id.x != 0u) { return; }
  let len = arrayLength(&input_words);
  var acc = u64(len) * XXH3_P1;
  if (len == 0u) {
    output_words[0u] = xxh3_avalanche(XXH3_SECRET[7u] ^ XXH3_SECRET[8u]);
    return;
  }
  if (len <= 2u) {
    let first = input_words[0u];
    let last = input_words[len - 1u];
    acc = acc + xxh3_mix(first, last, XXH3_SECRET[0u]);
    acc = acc + xxh3_rotl(first ^ last, 49u);
    output_words[0u] = xxh3_avalanche(acc);
    return;
  }
  var stripes = min(len, 24u);
  for (var i = 0u; i < stripes; i = i + 2u) {
    let a = input_words[i];
    let b = input_words[min(i + 1u, len - 1u)];
    acc = acc + xxh3_mix(a, b, XXH3_SECRET[i % 12u]);
    acc = xxh3_rotl(acc, 31u) * XXH3_P2 + XXH3_P3;
  }
  acc = acc + xxh3_mix(input_words[0u], input_words[len - 1u], XXH3_SECRET[11u]);
  output_words[0u] = xxh3_avalanche(acc);
}