use bytemuck::{Pod, Zeroable};
use crate::context::Context;
use super::accumulation::Accumulation;
#[repr(C)]
#[derive(Copy, Clone, Debug, Pod, Zeroable)]
struct DenoiseUniforms {
width: u32,
height: u32,
step: i32,
demodulate: u32,
remodulate: u32,
sigma_normal: f32,
sigma_luminance: f32,
_pad0: f32,
}
const PIXEL_SIZE: u64 = 16; const SIGMA_NORMAL: f32 = 64.0;
const SIGMA_LUMINANCE: f32 = 4.0;
struct Cache {
width: u32,
height: u32,
iterations: usize,
scratch: [wgpu::Buffer; 2],
_uniforms: Vec<wgpu::Buffer>,
bind_groups: Vec<wgpu::BindGroup>,
}
pub struct Denoise {
pipeline: wgpu::ComputePipeline,
bind_group_layout: wgpu::BindGroupLayout,
cache: Option<Cache>,
}
impl Denoise {
pub fn new() -> Denoise {
let ctxt = Context::get();
let bind_group_layout = ctxt.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("rt_denoise_bind_group_layout"),
entries: &[
storage_entry(0, true), storage_entry(1, false), storage_entry(2, true), uniform_entry(3),
],
});
let pipeline_layout = ctxt.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("rt_denoise_pipeline_layout"),
bind_group_layouts: &[Some(&bind_group_layout)],
immediate_size: 0,
});
let shader = ctxt.create_shader_module(
Some("rt_denoise_shader"),
&crate::builtin::compile_shader_with_common(
"package::denoise",
include_str!("../../builtin/raytrace/denoise.wgsl"),
),
);
let pipeline = ctxt
.device
.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: Some("rt_denoise_pipeline"),
layout: Some(&pipeline_layout),
module: &shader,
entry_point: Some("main"),
compilation_options: Default::default(),
cache: None,
});
Denoise {
pipeline,
bind_group_layout,
cache: None,
}
}
fn make_scratch(width: u32, height: u32, label: &str) -> wgpu::Buffer {
let count = (width.max(1) as u64) * (height.max(1) as u64);
Context::get().create_buffer_simple(
Some(label),
count * PIXEL_SIZE,
wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
)
}
fn ensure_cache(&mut self, accum: &Accumulation, iterations: usize) {
let width = accum.width;
let height = accum.height;
if let Some(c) = &self.cache {
if c.width == width && c.height == height && c.iterations == iterations {
return;
}
}
let ctxt = Context::get();
let scratch = [
Self::make_scratch(width, height, "rt_denoise_scratch0"),
Self::make_scratch(width, height, "rt_denoise_scratch1"),
];
let mut uniforms = Vec::with_capacity(iterations);
let mut bind_groups = Vec::with_capacity(iterations);
for i in 0..iterations {
let uniform = ctxt.create_buffer_simple(
Some("rt_denoise_uniform"),
std::mem::size_of::<DenoiseUniforms>() as u64,
wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
);
ctxt.write_buffer(
&uniform,
0,
bytemuck::bytes_of(&DenoiseUniforms {
width,
height,
step: 1i32 << i as u32,
demodulate: (i == 0) as u32,
remodulate: (i == iterations - 1) as u32,
sigma_normal: SIGMA_NORMAL,
sigma_luminance: SIGMA_LUMINANCE,
_pad0: 0.0,
}),
);
let src = if i == 0 {
&accum.buffer
} else {
&scratch[(i - 1) % 2]
};
let dst = &scratch[i % 2];
bind_groups.push(ctxt.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("rt_denoise_bind_group"),
layout: &self.bind_group_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: src.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 1,
resource: dst.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 2,
resource: accum.buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 3,
resource: uniform.as_entire_binding(),
},
],
}));
uniforms.push(uniform);
}
self.cache = Some(Cache {
width,
height,
iterations,
scratch,
_uniforms: uniforms,
bind_groups,
});
}
pub fn run<'a>(
&'a mut self,
encoder: &mut wgpu::CommandEncoder,
accum: &Accumulation,
iterations: u32,
gpu: &mut crate::renderer::timings::GpuTimer,
) -> &'a wgpu::Buffer {
let iterations = iterations.max(1) as usize;
self.ensure_cache(accum, iterations);
let cache = self.cache.as_ref().expect("cache just ensured");
let groups_x = accum.width.div_ceil(8);
let groups_y = accum.height.div_ceil(8);
for bind_group in &cache.bind_groups {
let denoise_ts = gpu.compute_scope("denoise");
let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
label: Some("rt_denoise_pass"),
timestamp_writes: denoise_ts,
});
pass.set_pipeline(&self.pipeline);
pass.set_bind_group(0, bind_group, &[]);
pass.dispatch_workgroups(groups_x, groups_y, 1);
}
&self.cache.as_ref().expect("cache ensured").scratch[(iterations - 1) % 2]
}
}
fn uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
wgpu::BindGroupLayoutEntry {
binding,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
}
}
fn storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
wgpu::BindGroupLayoutEntry {
binding,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
}
}