image_convolution/
pipeline.rs1use crate::gpu_device::*;
2use crate::{Image, Kernel, Real};
3
4pub struct Pipeline {
5 pub device: GpuDevice,
6 encoder: wgpu::CommandEncoder,
7}
8
9impl Pipeline {
10 pub fn new() -> Self {
11 let device = create_gpu_device();
12 let encoder = device
13 .device
14 .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
15 Pipeline { device, encoder }
16 }
17
18 pub fn chain(
19 &mut self,
20 input_buffer: &wgpu::Buffer,
21 kernel: &Kernel,
22 image_size: (u32, u32),
23 ) -> (wgpu::Buffer, (u32, u32)) {
24 let (width, height) = image_size;
25 let crop = kernel.size - 1;
26 let output = Image {
27 data: Vec::new(),
28 width: width - crop,
29 height: height - crop,
30 };
31 let output_size = (output.size() * std::mem::size_of::<Real>() as u32) as u64;
32 let result_buffer = self.device.create_buffer("result", output_size);
33 let kernel_buffer = self
34 .device
35 .create_data_buffer("kernel", bytemuck::cast_slice(&kernel.data));
36 let params = [width, kernel.size];
37 let params_data = bytemuck::cast_slice(¶ms);
38 let params_buffer = self.device.create_uniform_buffer("params", params_data);
39
40 let (bind_group, compute_pipeline) = self.device.create_compute_pipeline(
42 &[
43 (
44 &input_buffer,
45 4,
46 wgpu::BufferBindingType::Storage { read_only: true },
47 ),
48 (
49 &result_buffer,
50 4,
51 wgpu::BufferBindingType::Storage { read_only: false },
52 ),
53 (
54 &kernel_buffer,
55 4,
56 wgpu::BufferBindingType::Storage { read_only: true },
57 ),
58 (
59 ¶ms_buffer,
60 params_data.len() as u64,
61 wgpu::BufferBindingType::Uniform,
62 ),
63 ],
64 include_str!("convolution.wgsl"),
65 );
66
67 let mut cpass = self
68 .encoder
69 .begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
70 cpass.set_bind_group(0, &bind_group, &[]);
71 cpass.set_pipeline(&compute_pipeline);
72 cpass.dispatch(output.width, output.height, 1);
73
74 (result_buffer, (output.width, output.height))
75 }
76
77 pub async fn run<T: bytemuck::Pod>(
78 mut self,
79 output_buffers: &[(&wgpu::Buffer, (u32, u32), u32)],
80 ) -> Vec<Vec<T>> {
81 let mut output_offset_sizes = Vec::with_capacity(output_buffers.len());
82 let mut offset = 0;
83 for (result, image_size, pixel_size) in output_buffers {
84 let size = (image_size.0 * image_size.1 * pixel_size) as u64;
85 output_offset_sizes.push((result, offset, size));
86 offset += size;
87 }
88 let output_buffer = self.device.create_output_buffer("output", offset);
89 for (result, offset, size) in output_offset_sizes {
90 self.encoder
91 .copy_buffer_to_buffer(result, 0, &output_buffer, offset, size);
92 }
93 self.device.queue.submit(Some(self.encoder.finish()));
94
95 let buffer_slice = output_buffer.slice(..);
97 let buffer_future = buffer_slice.map_async(wgpu::MapMode::Read);
98 self.device.device.poll(wgpu::Maintain::Wait);
99
100 if let Ok(()) = buffer_future.await {
102 let data = buffer_slice.get_mapped_range();
103 let mut output = bytemuck::cast_slice::<u8, T>(&data).to_vec();
104
105 drop(data);
107 output_buffer.unmap();
108
109 let mut outputs = Vec::with_capacity(output_buffers.len());
110 for (_, image_size, _) in output_buffers {
111 let size = (image_size.0 * image_size.1) as usize;
112 let remained_data = output.split_off(size);
113 outputs.push(output);
114 output = remained_data;
115 }
116 outputs
117 } else {
118 panic!("failed to run compute on gpu!")
119 }
120 }
121}