1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
/// for starting your computing mission on gpu
/// first you need to write your kernel code 
/// in wgsl (recommended) or any other shader codes 
/// which wgpu supports , and then create compute_kernel variable
/// with that code .
///
/// x , y , z fields can be used to specify how many 
/// blocks of gpu is needed for your work
///
/// amounts of threads used = @workgroup_size (which you specify in your shader code) * x * y * z
#[derive(Debug , Clone)]
pub struct compute_kernel{
    /// sets number of GPU blocks in x direction
    pub x : u32 ,
    /// sets number of GPU blocks in y direction
    pub y : u32 , 
    /// sets number of GPU blocks in z direction
    pub z : u32 ,
    /// the default entry point is set to main function , so your kernel code in wgsl must 
    /// contain main function
    pub code : String,
}
impl compute_kernel{
    /// with your kernel code creates compute_kernel with x y and z set to 1
    fn new(code : String) -> Self{
        compute_kernel{
            x : 1,
            y : 1,
            z : 1,
            code : code,
        }
    }
}
/// with info struct you pass data to 
/// gpu side , for now set bind and group to the
/// same value !
/// think of it as id of your variable in wgsl side , 
/// wgpu uses it to find out where to copy data to in gpu 
/// side
///
/// in data field you should use vec! of your data 
/// the rest of variable types are not tested yet
#[derive(Debug , Clone)]
pub struct info<T>{
    /// sets binding index of variable in your kernel code (for now it must be same as your group)
    pub bind : u32,
    /// sets group index of variable in your kernel code (for now it must be same as your binding)
    pub group : u32,
    /// the data which you want grant access to GPU for rw
    pub data : T,
}
/// when you use compute! macro to start 
/// your computing , the default compute_config will 
/// be generated . but for customizing it you can 
/// create your own compute_config and pass it directly to 
/// compute_ext! macro as its first arg . _entry_point is set to main by default
/// change that to use another function for entry point of your kernel program
#[derive(Debug)]
pub struct compute_config{
    /// your custom wgpu instance
    pub _wgpu_instance : wgpu::Instance,
    /// your custom wgpu adapter
    pub _wgpu_adapter : wgpu::Adapter,
    /// your custom wgpu queue
    pub _wgpu_queue : wgpu::Queue,
    /// your custom wgpu device
    pub _wgpu_device : wgpu::Device,
    /// by default it will be set to main function in your wgsl kernel code
    pub _entry_point : String,
}
/// the default configuration tries to work on most of the devices 
impl Default for compute_config{
    /// it is used by compute macro to set defaults
    fn default() -> Self {
        let instance = wgpu::Instance::default();
        let adapter = pollster::block_on(instance
                .request_adapter(&wgpu::RequestAdapterOptions::default()))
                .expect("ERROR : failed to get adapter");
        let (device, queue) = pollster::block_on(adapter
                .request_device(
                    &wgpu::DeviceDescriptor {
                        label: None,
                        required_features: wgpu::Features::empty(),
                        required_limits: wgpu::Limits::downlevel_defaults(),
                        memory_hints: wgpu::MemoryHints::MemoryUsage,
                    },
                    None,
                ))
                .expect("ERROR : Adapter could not find the device");
        Self {
            _wgpu_instance : instance ,
            _wgpu_adapter : adapter ,
            _wgpu_queue : queue ,
            _wgpu_device : device ,
            _entry_point : "main".to_string() ,
        }
    }
}
/// if you want to do have customized config for wgpu
/// create compute_config and pass it as first arg 
/// to this macro
#[macro_export]
macro_rules! compute_ext {
    ($config:expr , $kernel:expr, $($data:expr),*) => {
        {
            use wgpu::util::DeviceExt;
            let instance = $config._wgpu_instance;
            let adapter = $config._wgpu_adapter;
            let device = $config._wgpu_device;
            let queue = $config._wgpu_queue;
            
            let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
                label: Some("Shader"),
                source: wgpu::ShaderSource::Wgsl($kernel.code.into()),
            });
            let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
                label: None,
                layout: None,
                module: &shader,
                entry_point: &$config._entry_point ,
                compilation_options: Default::default(),
                cache: None,
            });
            let mut encoder =
                device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
            let mut staging_buffers : Vec<wgpu::Buffer> = Vec::new();
            let mut sizes : Vec<wgpu::BufferAddress> = Vec::new();
            let mut storage_buffers : Vec<wgpu::Buffer> = Vec::new();
            {
                let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
                    label: None,
                    timestamp_writes: None,
                });
                $(
                    let refr = $data.data.as_slice();
                    let size = std::mem::size_of_val(refr) as wgpu::BufferAddress;
                    sizes.push(size);
                    let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
                        label: None,
                        size : sizes[sizes.len() - 1],
                        usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
                        mapped_at_creation: false,
                    });
                    staging_buffers.push(staging_buffer);
                    let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
                        label: Some("Storage Buffer"),
                        contents: bytemuck::cast_slice(refr),
                        usage: wgpu::BufferUsages::STORAGE
                            | wgpu::BufferUsages::COPY_DST
                            | wgpu::BufferUsages::COPY_SRC,
                    });
                    storage_buffers.push(storage_buffer);
                    let bind_group_layout = compute_pipeline.get_bind_group_layout($data.group);
                    let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
                        label: None,
                        layout: &bind_group_layout,
                        entries: &[wgpu::BindGroupEntry {
                            binding: $data.bind,
                            resource: storage_buffers[storage_buffers.len() - 1].as_entire_binding(),
                        }],
                    });
                    cpass.set_pipeline(&compute_pipeline);
                    cpass.set_bind_group($data.group, &bind_group, &[]);
                    )*
                        cpass.insert_debug_marker("debug_marker");
                    cpass.dispatch_workgroups($kernel.x, $kernel.y, $kernel.z); 
            }
            for (index, storage_buffer) in storage_buffers.iter().enumerate() {
                encoder.copy_buffer_to_buffer(&storage_buffer, 0, &staging_buffers[index], 0, sizes[index]);
            }
            queue.submit(Some(encoder.finish()));
            let mut index = 0;
            $(
                let buffer_slice = staging_buffers[index].slice(..);
                let (sender, receiver) = flume::bounded(1);
                buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
                device.poll(wgpu::Maintain::wait()).panic_on_timeout();
                if let Ok(Ok(())) = pollster::block_on(receiver.recv_async()) {
                    let data = buffer_slice.get_mapped_range();
                    $data.data = bytemuck::cast_slice(&data).to_vec();
                    drop(data);
                    staging_buffers[index].unmap(); 
                } else {
                    panic!("failed to run compute on gpu!")
                }
                index += 1;
                )*
        }
    };
}
/// compute macro is used to start your computing
/// compute!(compute_kernel , &mut info , ...)
///
/// compute macro starts the computing and when it finished
/// it will change the data fields to new data which gpu did set
/// to them , this way you can get results of the computing
#[macro_export]
macro_rules! compute {
    ($kernel:expr, $($data:expr),*) => {
        let config = core_compute::compute_config::default(); 
        core_compute::compute_ext!(config , $kernel, $($data),*);   
    };
}