1use std::os::raw::c_char;
78use std::ffi::CStr;
79
80use wgpu::util::DeviceExt;
81
82use std::sync::{Arc, Mutex};
83
84use core::ops::Range;
85
86mod configuration;
87pub use configuration::
88{GPUComputingBackend ,
89 GPUPowerSettings ,
90 GPUSpeedSettings ,
91 GPUMemorySettings ,
92 GPUComputingConfig};
93
94mod util;
95use util::{cchar_as_string};
96
97mod gpu_device;
98pub use gpu_device::
99{GPUDeviceType ,
100 GPUDeviceInfo ,
101 GPUDevices ,
102 get_computing_gpu_infos ,
103 free_gpu_devices_infos};
104
105
106
107struct GPUDeviceCollection {
108 compute_pipeline : Arc<wgpu::ComputePipeline> ,
109}
110
111struct GPUCollection {
112 device : Arc<wgpu::Device> ,
113 queue : Arc<wgpu::Queue> ,
114 res : Option<Arc<Mutex<Vec<GPUDeviceCollection>>>> ,
115}
116
117
118static mut GPU_RES_KEEPER : Option<Arc<Mutex<Vec<GPUCollection>>>> = None;
119
120
121#[no_mangle]
122pub extern "C" fn create_computing_gpu_resources(config : GPUComputingConfig , customize : GPUCustomSettings) -> usize {
127
128 let backend = match config.backend {
129 GPUComputingBackend::vulkan => {
130 wgpu::Backends::VULKAN
131 },
132 GPUComputingBackend::opengl => {
133 wgpu::Backends::GL
134 },
135 GPUComputingBackend::all => {
136 wgpu::Backends::all()
137 },
138 GPUComputingBackend::default_backend => {
139 wgpu::Backends::default()
140 },
141 GPUComputingBackend::metal => {
142 wgpu::Backends::METAL
143 },
144 GPUComputingBackend::direct_x12 => {
145 wgpu::Backends::DX12
146 },
147 GPUComputingBackend::highest_support => {
148 wgpu::Backends::PRIMARY
149 },
150 GPUComputingBackend::lowest_support => {
151 wgpu::Backends::SECONDARY
152 },
153 GPUComputingBackend::webgpu => {
154 wgpu::Backends::BROWSER_WEBGPU
155 },
156 };
157
158 let instance = wgpu::Instance::new(wgpu::InstanceDescriptor{
159 backends : backend ,
160 ..Default::default()
161 });
162
163
164 let mut adapter : wgpu::Adapter;
165
166 if config.gpu_index_in_backend_group < 0 {
167 let adapt = pollster::block_on(instance
168 .request_adapter(&wgpu::RequestAdapterOptions{
169 power_preference : match config.power {
170 GPUPowerSettings::none => {
171 wgpu::PowerPreference::None
172 },
173 GPUPowerSettings::LowPower => {
174 wgpu::PowerPreference::LowPower
175 },
176 GPUPowerSettings::HighPerformance => {
177 wgpu::PowerPreference::HighPerformance
178 },
179 },
180 ..Default::default()
181 }))
182 .expect("ERROR : could not allocate gpu resources which match your configs");
183 adapter = adapt;
184
185 }else {
186 let mut adapters = instance.enumerate_adapters(backend);
187 adapter = adapters.swap_remove(config.gpu_index_in_backend_group as usize);
188 }
189
190
191 let (device, queue) = pollster::block_on(adapter
192 .request_device(
193 &wgpu::DeviceDescriptor {
194 label: None,
195 required_features: wgpu::Features::empty(),
196 required_limits: match config.speed {
197 GPUSpeedSettings::lowest_speed => {
198 wgpu::Limits::downlevel_webgl2_defaults()
199 },
200 GPUSpeedSettings::low_speed => {
201 wgpu::Limits::downlevel_defaults()
202 },
203 GPUSpeedSettings::custom_speed => {
204
205 customize.gpu_speed_custom.to_gpu_limits()
206 },
207 GPUSpeedSettings::default_speed => {
208 wgpu::Limits::default()
209 },
210 },
211 memory_hints: match config.memory {
212 GPUMemorySettings::prefer_performance => {
213 wgpu::MemoryHints::Performance
214 },
215 GPUMemorySettings::prefer_memory => {
216 wgpu::MemoryHints::MemoryUsage
217 },
218 GPUMemorySettings::custom_memory => {
219
220
221 wgpu::MemoryHints::Manual{
222 suballocated_device_memory_block_size : customize.gpu_memory_custom.to_rs_range(),
223 }
224 },
225 },
226 },
227 None,
228 ))
229 .expect("ERROR : could not allocate gpu resources which match your configs");
230
231 unsafe{
233 let device = Arc::new(device);
234 let queue = Arc::new(queue);
235
236 if let None = &GPU_RES_KEEPER {
237 GPU_RES_KEEPER = Some(Arc::new(Mutex::new(Vec::new())));
238 }
239
240 let arci = GPU_RES_KEEPER.clone().unwrap();
241 let mut GPU_Data = arci.lock().unwrap();
242
243 let setting_cache_index = GPU_Data.len();
244 GPU_Data.push(GPUCollection{
245 device : Arc::clone(&device) ,
246 queue : Arc::clone(&queue) ,
247 res : None ,
248 });
249
250 return setting_cache_index;
251 }
252}
253
254#[no_mangle]
255pub extern "C" fn register_computing_kernel_code(gpu_res_index : usize , code : *const c_char , entry_point : *const c_char) -> usize {
259 unsafe {
260 match &GPU_RES_KEEPER {
261 None => {
262 panic!("ERROR : use create_gpu_resources function first to add and get index of your config !");
263 },
264 Some(arci) => {
265 let mut gpu_data = arci.lock().unwrap();
266 if gpu_data.len() <= gpu_res_index {
267 panic!("ERROR : invalid gpu_res_index provided for register_kernel_code function , please use the number which you received from create_gpu_resources function");
268 }
269
270 let shader = gpu_data[gpu_res_index].device.create_shader_module(wgpu::ShaderModuleDescriptor {
271 label: Some("Shader"),
272 source: wgpu::ShaderSource::Wgsl(cchar_as_string(code).expect("ERROR : No computing kernel code provided , code field is not set .").into()),
273 });
274
275
276
277 let compute_pipeline = gpu_data[gpu_res_index].device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
278 label: None,
279 layout: None,
280 module: &shader,
281 entry_point: &cchar_as_string(entry_point).expect("ERROR : No code_entry_point field is set , it must be name of function which your kernel code starts from") ,
282 compilation_options: Default::default(),
283 cache: None,
284 });
285
286 let arc_compute_pipe = Arc::new(compute_pipeline);
287
288 match &gpu_data[gpu_res_index].res {
289 None => {
290 gpu_data[gpu_res_index].res = Some(Arc::new(Mutex::new(Vec::new())));
291 let arci = gpu_data[gpu_res_index].res.clone().unwrap();
292 let mut gpu_device_res = arci.lock().unwrap();
293 let index = gpu_device_res.len();
294 gpu_device_res.push(GPUDeviceCollection{
295 compute_pipeline : Arc::clone(&arc_compute_pipe) ,
296 });
297
298 return index;
299 },
300 Some(arci) => {
301 let mut gpu_device_res = arci.lock().unwrap();
302 let index = gpu_device_res.len();
303 gpu_device_res.push(GPUDeviceCollection{
304 compute_pipeline : Arc::clone(&arc_compute_pipe) ,
305 });
306
307 return index;
308 }
309 }
310 }
311 }
312 }
313}
314
315#[no_mangle]
316pub extern "C" fn free_compute_kernel_codes(gpu_res_index : usize){
320 unsafe {
321 match &GPU_RES_KEEPER {
322 None => return ,
323 Some(arci) => {
324 let mut gpu_data = arci.lock().unwrap();
325 if gpu_data.len() < gpu_res_index {
326 return;
327 }else{
328 gpu_data[gpu_res_index].res = None;
329 return;
330 }
331 }
332 }
333 }
334}
335
336
337
338#[repr(C)]
339#[derive(Debug, Clone)]
340pub struct CKernel {
344 pub x : u32 ,
346 pub y : u32 ,
348 pub z : u32 ,
350 pub kernel_code_index : usize ,
355 pub config_index : usize ,
360}
361
362#[no_mangle]
363pub extern "C" fn set_kernel_default_config(kernel: *mut CKernel) -> usize{
372 if kernel.is_null() {
374 panic!("ERROR : NULL value provided for set_kernel_default_config");
375 }
376
377 unsafe {
378
379 let kernel = &mut *kernel;
380
381
382 let config = GPUComputingConfig {
383 backend: GPUComputingBackend::opengl,
384 power: GPUPowerSettings::HighPerformance,
385 speed: GPUSpeedSettings::low_speed,
386 memory: GPUMemorySettings::prefer_memory,
387 gpu_index_in_backend_group : -1,
388 };
389
390 let customize = GPUCustomSettings::default();
391
392 let index = create_computing_gpu_resources(config , customize);
393
394 kernel.config_index = index;
395
396 return index;
397 }
398}
399
400
401impl CKernel {
402 fn get_real_config(&self) -> (Arc<wgpu::Device> , Arc<wgpu::Queue> , Arc<wgpu::ComputePipeline>) {
405 unsafe{
406 match &GPU_RES_KEEPER {
407 None => {
408 panic!("ERROR : before using compute function you must use create_gpu_resources and register_kernel_code");
409 },
410 Some(arci) => {
411 let gpu_data = arci.lock().unwrap();
412 if gpu_data.len() <= self.config_index {
413 panic!("ERROR : invalid config_index used for CKernel arg");
414 }
415 if let Some(arcii) = &gpu_data[self.config_index].res {
416 let gpu_device_data = arcii.lock().unwrap();
417 if gpu_device_data.len() <= self.kernel_code_index {
418 panic!("ERROR : invalid kernel_code_index used for CKernel arg");
419 }
420
421 (Arc::clone(&gpu_data[self.config_index].device) , Arc::clone(&gpu_data[self.config_index].queue) , Arc::clone(&gpu_device_data[self.kernel_code_index].compute_pipeline))
422 }else{
423 panic!("ERROR : before using compute function you must register_kernel_code");
424 }
425 },
426 }
427 }
428 }
429}
430
431#[repr(C)]
432#[derive(Debug, Clone)]
433pub struct DataBinder {
437 pub bind: u32,
439 pub data_len: usize,
444 pub data: *mut *mut u8,
449}
450
451#[repr(C)]
452#[derive(Debug, Clone , Default)]
453pub struct GPUCustomSettings {
455 pub gpu_speed_custom : GPUSpeedCustom ,
457 pub gpu_memory_custom : GPUMemoryCustom ,
459}
460
461#[repr(C)]
462#[derive(Debug, Clone , Default)]
463pub struct GPUMemoryCustom {
466 pub min : u64 ,
468 pub max : u64 ,
470}
471
472impl GPUMemoryCustom{
473 fn to_rs_range(&self) -> Range<u64> {
474 std::ops::Range{start : self.min , end : self.max}
475 }
476}
477
478#[repr(C)]
479#[derive(Debug, Clone , Default)]
480pub struct GPUSpeedCustom {
483 pub max_texture_dimension_1d: u32,
484 pub max_texture_dimension_2d: u32,
485 pub max_texture_dimension_3d: u32,
486 pub max_texture_array_layers: u32,
487 pub max_bind_groups: u32,
488 pub max_bindings_per_bind_group: u32,
489 pub max_dynamic_uniform_buffers_per_pipeline_layout: u32,
490 pub max_dynamic_storage_buffers_per_pipeline_layout: u32,
491 pub max_sampled_textures_per_shader_stage: u32,
492 pub max_samplers_per_shader_stage: u32,
493 pub max_storage_buffers_per_shader_stage: u32,
494 pub max_storage_textures_per_shader_stage: u32,
495 pub max_uniform_buffers_per_shader_stage: u32,
496 pub max_uniform_buffer_binding_size: u32,
497 pub max_storage_buffer_binding_size: u32,
498 pub max_vertex_buffers: u32,
499 pub max_buffer_size: u64,
500 pub max_vertex_attributes: u32,
501 pub max_vertex_buffer_array_stride: u32,
502 pub min_uniform_buffer_offset_alignment: u32,
503 pub min_storage_buffer_offset_alignment: u32,
504 pub max_inter_stage_shader_components: u32,
505 pub max_color_attachments: u32,
506 pub max_color_attachment_bytes_per_sample: u32,
507 pub max_compute_workgroup_storage_size: u32,
508 pub max_compute_invocations_per_workgroup: u32,
509 pub max_compute_workgroup_size_x: u32,
510 pub max_compute_workgroup_size_y: u32,
511 pub max_compute_workgroup_size_z: u32,
512 pub max_compute_workgroups_per_dimension: u32,
513 pub min_subgroup_size: u32,
514 pub max_subgroup_size: u32,
515 pub max_push_constant_size: u32,
516 pub max_non_sampler_bindings: u32,
517}
518
519impl GPUSpeedCustom {
520 fn to_gpu_limits(&self) -> wgpu::Limits {
521 wgpu::Limits {
522 max_texture_dimension_1d: self.max_texture_dimension_1d,
523 max_texture_dimension_2d: self.max_texture_dimension_2d,
524 max_texture_dimension_3d: self.max_texture_dimension_3d,
525 max_texture_array_layers: self.max_texture_array_layers,
526 max_bind_groups: self.max_bind_groups,
527 max_bindings_per_bind_group: self.max_bindings_per_bind_group,
528 max_dynamic_uniform_buffers_per_pipeline_layout: self.max_dynamic_uniform_buffers_per_pipeline_layout,
529 max_dynamic_storage_buffers_per_pipeline_layout: self.max_dynamic_storage_buffers_per_pipeline_layout,
530 max_sampled_textures_per_shader_stage: self.max_sampled_textures_per_shader_stage,
531 max_samplers_per_shader_stage: self.max_samplers_per_shader_stage,
532 max_storage_buffers_per_shader_stage: self.max_storage_buffers_per_shader_stage,
533 max_storage_textures_per_shader_stage: self.max_storage_textures_per_shader_stage,
534 max_uniform_buffers_per_shader_stage: self.max_uniform_buffers_per_shader_stage,
535 max_uniform_buffer_binding_size: self.max_uniform_buffer_binding_size,
536 max_storage_buffer_binding_size: self.max_storage_buffer_binding_size,
537 max_vertex_buffers: self.max_vertex_buffers,
538 max_buffer_size: self.max_buffer_size,
539 max_vertex_attributes: self.max_vertex_attributes,
540 max_vertex_buffer_array_stride: self.max_vertex_buffer_array_stride,
541 min_uniform_buffer_offset_alignment: self.min_uniform_buffer_offset_alignment,
542 min_storage_buffer_offset_alignment: self.min_storage_buffer_offset_alignment,
543 max_inter_stage_shader_components: self.max_inter_stage_shader_components,
544 max_color_attachments: self.max_color_attachments,
545 max_color_attachment_bytes_per_sample: self.max_color_attachment_bytes_per_sample,
546 max_compute_workgroup_storage_size: self.max_compute_workgroup_storage_size,
547 max_compute_invocations_per_workgroup: self.max_compute_invocations_per_workgroup,
548 max_compute_workgroup_size_x: self.max_compute_workgroup_size_x,
549 max_compute_workgroup_size_y: self.max_compute_workgroup_size_y,
550 max_compute_workgroup_size_z: self.max_compute_workgroup_size_z,
551 max_compute_workgroups_per_dimension: self.max_compute_workgroups_per_dimension,
552 min_subgroup_size: self.min_subgroup_size,
553 max_subgroup_size: self.max_subgroup_size,
554 max_push_constant_size: self.max_push_constant_size,
555 max_non_sampler_bindings: self.max_non_sampler_bindings,
556 }
557 }
558}
559
560#[repr(C)]
575#[derive(Debug, Clone)]
576pub struct GroupOfBinders {
581 pub group : u32 ,
583 pub datas : *mut DataBinder ,
587 pub datas_len : usize ,
589}
590
591
592
593#[no_mangle]
594pub extern "C" fn compute(kernel : *mut CKernel , data_for_gpu : *mut GroupOfBinders , gpu_data_len : usize) -> i32 {
606
607 {
608 let kernel = unsafe {&mut *kernel};
611
612
613
614 if data_for_gpu.is_null(){
615 println!("ERROR : data_for_gpu arg of compute function is NULL");
616 return -1;
617 }
618
619 let (device , queue , compute_pipeline) = kernel.get_real_config();
620
621 let mut encoder =
627 device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
628
629 let mut staging_buffers : Vec<wgpu::Buffer> = Vec::new();
630 let mut sizes : Vec<wgpu::BufferAddress> = Vec::new();
631 let mut storage_buffers : Vec<wgpu::Buffer> = Vec::new();
632
633
634 let groups : &mut [GroupOfBinders] = unsafe { std::slice::from_raw_parts_mut(data_for_gpu , gpu_data_len) };
635
636 {
638 let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
639 label: None,
640 timestamp_writes: None,
641 });
642
643 for group in &mut *groups {
644 let bind_group_layout = compute_pipeline.get_bind_group_layout(group.group);
645 if group.datas.is_null() {
646 println!("ERROR : no data provided for datas field in data_for_gpu arg");
647 return -1;
648 }
649
650 let bindings : &mut [DataBinder] = unsafe{
651 std::slice::from_raw_parts_mut(group.datas , group.datas_len)
652 };
653
654 let mut tmp_staging_buffers : Vec<wgpu::Buffer> = Vec::new();
655 let mut tmp_sizes : Vec<wgpu::BufferAddress> = Vec::new();
656 let mut tmp_storage_buffers : Vec<wgpu::Buffer> = Vec::new();
657
658 let mut entries : Vec<wgpu::BindGroupEntry> = Vec::new();
659
660 for binder in &mut *bindings {
661 if binder.data.is_null() {
662 println!("ERROR : null data field in DataBinder found");
663 return -1;
664 }
665
666 let data : &[u8] = unsafe{
667 std::slice::from_raw_parts(*binder.data , binder.data_len)
668 };
669
670
671
672 let size = std::mem::size_of_val(data) as wgpu::BufferAddress;
673
674 let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
675 label: None ,
676 size : size ,
677 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
678 mapped_at_creation: false,
679 });
680
681 let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
682 label: Some("Storage Buffer"),
683 contents: data ,
684 usage: wgpu::BufferUsages::STORAGE
685 | wgpu::BufferUsages::COPY_DST
686 | wgpu::BufferUsages::COPY_SRC,
687 });
688
689
690
691 tmp_sizes.push(size);
692 tmp_staging_buffers.push(staging_buffer);
693 tmp_storage_buffers.push(storage_buffer);
694 }
695
696
697
698 for (i, binder) in bindings.iter().enumerate() {
699 entries.push(wgpu::BindGroupEntry {
700 binding: binder.bind.clone(),
701 resource: tmp_storage_buffers[i].as_entire_binding(),
702 });
703 }
704
705 let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
706 label: None,
707 layout: &bind_group_layout,
708 entries: entries.as_slice() ,
709 });
710
711 cpass.set_pipeline(&compute_pipeline);
712 cpass.set_bind_group(group.group , &bind_group, &[]);
713
714 storage_buffers.append(&mut tmp_storage_buffers);
715 staging_buffers.append(&mut tmp_staging_buffers);
716 sizes.append(&mut tmp_sizes);
717 }
718
719 cpass.insert_debug_marker("debug_marker");
720 cpass.dispatch_workgroups(kernel.x, kernel.y, kernel.z);
721 }
722 for (index, storage_buffer) in storage_buffers.iter().enumerate() {
726 encoder.copy_buffer_to_buffer(&storage_buffer, 0, &staging_buffers[index], 0, sizes[index]);
727 }
728
729 queue.submit(Some(encoder.finish()));
730
731
732
733 let mut index : usize = 0;
734 for group in groups {
735 let bindings : &mut [DataBinder] = unsafe{
736 std::slice::from_raw_parts_mut(group.datas , group.datas_len)
737 };
738
739 for binder in bindings {
740
741 let data : Box<[u8]> = unsafe{
742 Box::from_raw(std::slice::from_raw_parts_mut(*binder.data , binder.data_len))
743 };
744
745
746 let buffer_slice = staging_buffers[index].slice(..);
747 let (sender, receiver) = flume::bounded(1);
748 buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
749
750 device.poll(wgpu::Maintain::wait()).panic_on_timeout();
751
752 if let Ok(Ok(())) = pollster::block_on(receiver.recv_async()) {
753 let mapped_data = buffer_slice.get_mapped_range();
754
755
756
757 unsafe {
758 let mapped_data_ptr = mapped_data.as_ptr();
759 let data: &[u8] = unsafe { std::slice::from_raw_parts(mapped_data_ptr , binder.data_len) };
760 let tmp_box : Box<[u8]> = data.into();
761 *binder.data = tmp_box.as_ptr() as *mut u8;
762 std::mem::forget(tmp_box);
763 }
764
765
766
767 drop(mapped_data);
768 staging_buffers[index].unmap();
769
770 } else {
771 panic!("failed to run compute on gpu!")
772 }
773
774 index += 1;
775 }
776 }
777
778
779
780 return 0;
781 }
782}
783
784
785#[no_mangle]
786pub extern "C" fn free_compute_cache(){
793 unsafe {
794 GPU_RES_KEEPER = None;
795 }
796}