vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
@group(0) @binding(0) var<storage, read> input_words: array<u32>;
@group(0) @binding(1) var<storage, read_write> output_words: array<u32>;

const B2S_IV: array<u32, 8> = array<u32, 8>(0x6a09e667u,0xbb67ae85u,0x3c6ef372u,0xa54ff53au,0x510e527fu,0x9b05688cu,0x1f83d9abu,0x5be0cd19u);
const B2S_SIGMA: array<u32, 160> = array<u32, 160>(
  0u,1u,2u,3u,4u,5u,6u,7u,8u,9u,10u,11u,12u,13u,14u,15u, 14u,10u,4u,8u,9u,15u,13u,6u,1u,12u,0u,2u,11u,7u,5u,3u,
  11u,8u,12u,0u,5u,2u,15u,13u,10u,14u,3u,6u,7u,1u,9u,4u, 7u,9u,3u,1u,13u,12u,11u,14u,2u,6u,5u,10u,4u,0u,15u,8u,
  9u,0u,5u,7u,2u,4u,10u,15u,14u,1u,11u,12u,6u,8u,3u,13u, 2u,12u,6u,10u,0u,11u,8u,3u,4u,13u,7u,5u,15u,14u,1u,9u,
  12u,5u,1u,15u,14u,13u,4u,10u,0u,7u,6u,3u,9u,2u,8u,11u, 13u,11u,7u,14u,12u,1u,3u,9u,5u,0u,15u,4u,8u,6u,2u,10u,
  6u,15u,14u,9u,11u,3u,0u,8u,12u,2u,13u,7u,1u,4u,10u,5u, 10u,2u,8u,4u,7u,6u,1u,5u,15u,11u,9u,14u,3u,12u,13u,0u);

fn b2s_rotr(x: u32, n: u32) -> u32 { return (x >> n) | (x << ((32u - n) & 31u)); }
fn b2s_g(v_in: array<u32, 16>, a: u32, b: u32, c: u32, d: u32, x: u32, y: u32) -> array<u32, 16> {
  var v = v_in;
  v[a] = v[a] + v[b] + x; v[d] = b2s_rotr(v[d] ^ v[a], 16u);
  v[c] = v[c] + v[d];     v[b] = b2s_rotr(v[b] ^ v[c], 12u);
  v[a] = v[a] + v[b] + y; v[d] = b2s_rotr(v[d] ^ v[a], 8u);
  v[c] = v[c] + v[d];     v[b] = b2s_rotr(v[b] ^ v[c], 7u);
  return v;
}

@compute @workgroup_size(1, 1, 1)
fn hash_blake2s(@builtin(global_invocation_id) id: vec3<u32>) {
  if (id.x != 0u) { return; }
  var m: array<u32, 16>;
  for (var i = 0u; i < 16u; i = i + 1u) { m[i] = 0u; }
  let words = min(arrayLength(&input_words), 16u);
  for (var i = 0u; i < words; i = i + 1u) { m[i] = input_words[i]; }
  var h: array<u32, 8>;
  for (var i = 0u; i < 8u; i = i + 1u) { h[i] = B2S_IV[i]; }
  h[0u] = h[0u] ^ 0x01010020u;
  var v: array<u32, 16>;
  for (var i = 0u; i < 8u; i = i + 1u) { v[i] = h[i]; v[i + 8u] = B2S_IV[i]; }
  v[12u] = v[12u] ^ (words * 4u);
  v[14u] = ~v[14u];
  for (var r = 0u; r < 10u; r = r + 1u) {
    let o = r * 16u;
    v = b2s_g(v,0u,4u,8u,12u,m[B2S_SIGMA[o]],m[B2S_SIGMA[o+1u]]);
    v = b2s_g(v,1u,5u,9u,13u,m[B2S_SIGMA[o+2u]],m[B2S_SIGMA[o+3u]]);
    v = b2s_g(v,2u,6u,10u,14u,m[B2S_SIGMA[o+4u]],m[B2S_SIGMA[o+5u]]);
    v = b2s_g(v,3u,7u,11u,15u,m[B2S_SIGMA[o+6u]],m[B2S_SIGMA[o+7u]]);
    v = b2s_g(v,0u,5u,10u,15u,m[B2S_SIGMA[o+8u]],m[B2S_SIGMA[o+9u]]);
    v = b2s_g(v,1u,6u,11u,12u,m[B2S_SIGMA[o+10u]],m[B2S_SIGMA[o+11u]]);
    v = b2s_g(v,2u,7u,8u,13u,m[B2S_SIGMA[o+12u]],m[B2S_SIGMA[o+13u]]);
    v = b2s_g(v,3u,4u,9u,14u,m[B2S_SIGMA[o+14u]],m[B2S_SIGMA[o+15u]]);
  }
  for (var i = 0u; i < 8u; i = i + 1u) { output_words[i] = h[i] ^ v[i] ^ v[i + 8u]; }
}