image_convolution/
pipeline.rs

1use crate::gpu_device::*;
2use crate::{Image, Kernel, Real};
3
4pub struct Pipeline {
5    pub device: GpuDevice,
6    encoder: wgpu::CommandEncoder,
7}
8
9impl Pipeline {
10    pub fn new() -> Self {
11        let device = create_gpu_device();
12        let encoder = device
13            .device
14            .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
15        Pipeline { device, encoder }
16    }
17
18    pub fn chain(
19        &mut self,
20        input_buffer: &wgpu::Buffer,
21        kernel: &Kernel,
22        image_size: (u32, u32),
23    ) -> (wgpu::Buffer, (u32, u32)) {
24        let (width, height) = image_size;
25        let crop = kernel.size - 1;
26        let output = Image {
27            data: Vec::new(),
28            width: width - crop,
29            height: height - crop,
30        };
31        let output_size = (output.size() * std::mem::size_of::<Real>() as u32) as u64;
32        let result_buffer = self.device.create_buffer("result", output_size);
33        let kernel_buffer = self
34            .device
35            .create_data_buffer("kernel", bytemuck::cast_slice(&kernel.data));
36        let params = [width, kernel.size];
37        let params_data = bytemuck::cast_slice(&params);
38        let params_buffer = self.device.create_uniform_buffer("params", params_data);
39
40        // create bind group and compute pipeline
41        let (bind_group, compute_pipeline) = self.device.create_compute_pipeline(
42            &[
43                (
44                    &input_buffer,
45                    4,
46                    wgpu::BufferBindingType::Storage { read_only: true },
47                ),
48                (
49                    &result_buffer,
50                    4,
51                    wgpu::BufferBindingType::Storage { read_only: false },
52                ),
53                (
54                    &kernel_buffer,
55                    4,
56                    wgpu::BufferBindingType::Storage { read_only: true },
57                ),
58                (
59                    &params_buffer,
60                    params_data.len() as u64,
61                    wgpu::BufferBindingType::Uniform,
62                ),
63            ],
64            include_str!("convolution.wgsl"),
65        );
66
67        let mut cpass = self
68            .encoder
69            .begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
70        cpass.set_bind_group(0, &bind_group, &[]);
71        cpass.set_pipeline(&compute_pipeline);
72        cpass.dispatch(output.width, output.height, 1);
73
74        (result_buffer, (output.width, output.height))
75    }
76
77    pub async fn run<T: bytemuck::Pod>(
78        mut self,
79        output_buffers: &[(&wgpu::Buffer, (u32, u32), u32)],
80    ) -> Vec<Vec<T>> {
81        let mut output_offset_sizes = Vec::with_capacity(output_buffers.len());
82        let mut offset = 0;
83        for (result, image_size, pixel_size) in output_buffers {
84            let size = (image_size.0 * image_size.1 * pixel_size) as u64;
85            output_offset_sizes.push((result, offset, size));
86            offset += size;
87        }
88        let output_buffer = self.device.create_output_buffer("output", offset);
89        for (result, offset, size) in output_offset_sizes {
90            self.encoder
91                .copy_buffer_to_buffer(result, 0, &output_buffer, offset, size);
92        }
93        self.device.queue.submit(Some(self.encoder.finish()));
94
95        // Read output
96        let buffer_slice = output_buffer.slice(..);
97        let buffer_future = buffer_slice.map_async(wgpu::MapMode::Read);
98        self.device.device.poll(wgpu::Maintain::Wait);
99
100        // Awaits until `buffer_future` can be read from
101        if let Ok(()) = buffer_future.await {
102            let data = buffer_slice.get_mapped_range();
103            let mut output = bytemuck::cast_slice::<u8, T>(&data).to_vec();
104
105            // We have to make sure all mapped views are dropped before we unmap the buffer.
106            drop(data);
107            output_buffer.unmap();
108
109            let mut outputs = Vec::with_capacity(output_buffers.len());
110            for (_, image_size, _) in output_buffers {
111                let size = (image_size.0 * image_size.1) as usize;
112                let remained_data = output.split_off(size);
113                outputs.push(output);
114                output = remained_data;
115            }
116            outputs
117        } else {
118            panic!("failed to run compute on gpu!")
119        }
120    }
121}