vk-video 0.3.0

A library for hardware video coding using Vulkan Video, with wgpu integration.
Documentation
@group(0) @binding(0) var source_y: texture_storage_2d<r8unorm, read>;
@group(0) @binding(1) var source_uv: texture_storage_2d<rg8unorm, read>;

@group(1) @binding(0) var dest_y: binding_array<texture_storage_2d<r8unorm, write>, 8>;
@group(2) @binding(0) var dest_uv: binding_array<texture_storage_2d<rg8unorm, write>, 8>;

struct Immediates {
  output_number: u32,
  input_width: u32,
  input_height: u32,
  scaling: array<u32, 8>,
}

var<immediate> imm: Immediates;

const PI: f32 = 3.14159265358979323846;

fn sinc(x: f32) -> f32 {
  if abs(x) < 1e-6 {
    return 1.0;
  }
  let px = PI * x;
  return sin(px) / px;
}

fn lanczos3_weight(x: f32) -> f32 {
  if abs(x) >= 3.0 {
    return 0.0;
  }
  return sinc(x) * sinc(x / 3.0);
}

fn sample_nearest_y(float_coords: vec2<f32>, input_size: vec2<u32>) -> vec4<f32> {
  let coords = vec2<u32>(vec2<f32>(input_size) * float_coords);
  return textureLoad(source_y, coords);
}

fn sample_bilinear_y(float_coords: vec2<f32>, input_size: vec2<u32>) -> vec4<f32> {
  let fc = vec2<f32>(input_size) * float_coords - 0.5;
  let x0 = u32(max(floor(fc.x), 0.0));
  let y0 = u32(max(floor(fc.y), 0.0));
  let x1 = min(x0 + 1, input_size.x - 1);
  let y1 = min(y0 + 1, input_size.y - 1);
  let fx = fc.x - floor(fc.x);
  let fy = fc.y - floor(fc.y);

  let p00 = textureLoad(source_y, vec2(x0, y0)).r;
  let p10 = textureLoad(source_y, vec2(x1, y0)).r;
  let p01 = textureLoad(source_y, vec2(x0, y1)).r;
  let p11 = textureLoad(source_y, vec2(x1, y1)).r;

  let val = mix(mix(p00, p10, fx), mix(p01, p11, fx), fy);
  return vec4(val, 0.0, 0.0, 1.0);
}

fn sample_lanczos3_y(float_coords: vec2<f32>, input_size: vec2<u32>) -> vec4<f32> {
  let fc = vec2<f32>(input_size) * float_coords - 0.5;
  let center_x = floor(fc.x);
  let center_y = floor(fc.y);
  let max_x = i32(input_size.x) - 1;
  let max_y = i32(input_size.y) - 1;

  var sum = 0.0;
  var weight_sum = 0.0;
  for (var dy = -2; dy <= 3; dy++) {
    let sy = clamp(i32(center_y) + dy, 0, max_y);
    let wy = lanczos3_weight(fc.y - (center_y + f32(dy)));
    for (var dx = -2; dx <= 3; dx++) {
      let sx = clamp(i32(center_x) + dx, 0, max_x);
      let wx = lanczos3_weight(fc.x - (center_x + f32(dx)));
      let w = wx * wy;
      sum += textureLoad(source_y, vec2(u32(sx), u32(sy))).r * w;
      weight_sum += w;
    }
  }

  let val = sum / weight_sum;
  return vec4(val, 0.0, 0.0, 1.0);
}

fn sample_nearest_uv(float_coords: vec2<f32>, input_uv_size: vec2<u32>) -> vec4<f32> {
  let coords = vec2<u32>(vec2<f32>(input_uv_size) * float_coords);
  return textureLoad(source_uv, coords);
}

fn sample_bilinear_uv(float_coords: vec2<f32>, input_uv_size: vec2<u32>) -> vec4<f32> {
  let fc = vec2<f32>(input_uv_size) * float_coords - 0.5;
  let x0 = u32(max(floor(fc.x), 0.0));
  let y0 = u32(max(floor(fc.y), 0.0));
  let x1 = min(x0 + 1, input_uv_size.x - 1);
  let y1 = min(y0 + 1, input_uv_size.y - 1);
  let fx = fc.x - floor(fc.x);
  let fy = fc.y - floor(fc.y);

  let p00 = textureLoad(source_uv, vec2(x0, y0));
  let p10 = textureLoad(source_uv, vec2(x1, y0));
  let p01 = textureLoad(source_uv, vec2(x0, y1));
  let p11 = textureLoad(source_uv, vec2(x1, y1));

  let val = mix(mix(p00, p10, vec4(fx)), mix(p01, p11, vec4(fx)), vec4(fy));
  return val;
}

fn sample_lanczos3_uv(float_coords: vec2<f32>, input_uv_size: vec2<u32>) -> vec4<f32> {
  let fc = vec2<f32>(input_uv_size) * float_coords - 0.5;
  let center_x = floor(fc.x);
  let center_y = floor(fc.y);
  let max_x = i32(input_uv_size.x) - 1;
  let max_y = i32(input_uv_size.y) - 1;

  var sum = vec4(0.0);
  var weight_sum = 0.0;
  for (var dy = -2; dy <= 3; dy++) {
    let sy = clamp(i32(center_y) + dy, 0, max_y);
    let wy = lanczos3_weight(fc.y - (center_y + f32(dy)));
    for (var dx = -2; dx <= 3; dx++) {
      let sx = clamp(i32(center_x) + dx, 0, max_x);
      let wx = lanczos3_weight(fc.x - (center_x + f32(dx)));
      let w = wx * wy;
      sum += textureLoad(source_uv, vec2(u32(sx), u32(sy))) * w;
      weight_sum += w;
    }
  }

  return sum / weight_sum;
}

@compute
@workgroup_size(256)
fn main(@builtin(global_invocation_id) id: vec3<u32>) {
  var remaining_offset: u32 = id.x;
  var i: u32 = 0;
  for ( ; i < imm.output_number; i++ ) {
    let size = textureDimensions(dest_y[i]);
    let total_size = (size.x * size.y + 255) / 256 * 256;
    if (remaining_offset < total_size) {
      break;
    }

    remaining_offset -= total_size;
  }

  if i >= imm.output_number {
    return;
  }

  let size = textureDimensions(dest_y[i]);
  let y = remaining_offset / size.x;
  let x = remaining_offset % size.x;
  let coords_output = vec2(x, y);

  if y >= size.y {
    return;
  }

  let float_coords = (vec2<f32>(coords_output) + 0.5) / vec2<f32>(size);
  let input_size = vec2(imm.input_width, imm.input_height);
  let algo = imm.scaling[i];

  var output_y: vec4<f32>;
  if algo == 2u {
    output_y = sample_lanczos3_y(float_coords, input_size);
  } else if algo == 1u {
    output_y = sample_bilinear_y(float_coords, input_size);
  } else {
    output_y = sample_nearest_y(float_coords, input_size);
  }
  textureStore(dest_y[i], coords_output, output_y);

  if (x % 2 == 0 && y % 2 == 0) {
    let input_uv_size = input_size / 2;
    let uv_coords_output = coords_output / 2;
    let uv_size = size / 2;
    let float_coords_uv = (vec2<f32>(uv_coords_output) + 0.5) / vec2<f32>(uv_size);
    var output_uv: vec4<f32>;
    if algo == 2u {
      output_uv = sample_lanczos3_uv(float_coords_uv, input_uv_size);
    } else if algo == 1u {
      output_uv = sample_bilinear_uv(float_coords_uv, input_uv_size);
    } else {
      output_uv = sample_nearest_uv(float_coords_uv, input_uv_size);
    }
    textureStore(dest_uv[i], uv_coords_output, output_uv);
  }
}