Skip to main content

easy_async_opencl3/
lib.rs

1//! # easy-async-cl3
2//! 
3//! A high-level, async-first Rust wrapper for OpenCL with intelligent GPU management.
4//! 
5//! This library provides:
6//! - **Async/await support**: All GPU operations return futures
7//! - **Automatic resource management**: RAII-based cleanup
8//! - **Multi-GPU support**: Automatic work distribution
9//! - **Type-safe API**: Compile-time guarantees
10//! - **Profiling support**: Built-in performance measurement
11//! - **Modern OpenCL features**: Support for OpenCL 1.1 through 3.0
12
13pub mod async_executor;
14pub use async_executor::pipeline_task::{PipelineBuilder, PipelineReport};
15#[allow(unused)]
16#[allow(dead_code)]
17pub mod cl_types;
18pub mod error;
19
20pub fn add(left: u64, right: u64) -> u64 {
21    left + right
22}
23
24#[cfg(test)]
25mod tests {
26    use std::sync::Arc;
27    use std::time::Instant;
28    use std::{ffi::c_void, ptr::null_mut};
29
30    use crate::{
31        cl_types::{
32            cl_command_queue::{
33                ClCommandQueue,
34                command_queue_parameters::{CommandQueueProperties, Version20},
35            }, 
36            cl_context::ClContext, 
37            cl_platform::ClPlatform, 
38            cl_image::{image_desc::ClImageDesc, image_formats::ClImageFormats},
39            memory_flags::MemoryFlags
40        },
41        error::ClError,
42    };
43
44    macro_rules! time_it {
45        ($label:expr, $block:block) => {{
46            let start = Instant::now();
47            let result = $block;
48            let duration = start.elapsed();
49            println!("[TIMER] {}: {:?}", $label, duration);
50            result
51        }};
52    }
53
54    #[test]
55    fn test_hardware_discovery() -> Result<(), ClError> {
56        println!("\n=== HARDWARE DISCOVERY ===");
57        let platforms = time_it!("Platform enumeration", { ClPlatform::get_all()? });
58        
59        for (i, platform) in platforms.iter().enumerate() {
60            println!("Platform [{}]: {}", i, platform);
61            let devices = time_it!(format!("Device discovery (Platform {})", i), { platform.get_all_devices()? });
62            for device in devices {
63                println!("  - Device: {}", device);
64                println!("    Version: {}", device.get_opencl_version());
65                println!("    Max Compute Units: {}", device.get_max_compute_units().unwrap_or(0));
66            }
67        }
68        Ok(())
69    }
70
71    #[test]
72    fn test_core_resource_lifecycle() -> Result<(), ClError> {
73        println!("\n=== RESOURCE LIFECYCLE ===");
74        let platform = ClPlatform::default()?;
75        let devices = platform.get_all_devices()?;
76        
77        let context = time_it!("Context creation", { ClContext::new(&devices)? });
78        
79        for (i, device) in devices.iter().enumerate() {
80            let props = CommandQueueProperties::<Version20>::new()
81                .set_cl_queue_properties(true, true, false, false)
82                .get_properties();
83            
84            let _queue = time_it!(format!("Queue creation (Device {})", i), {
85                ClCommandQueue::create_command_queue_with_properties(&context, device, &props)?
86            });
87        }
88        Ok(())
89    }
90
91    #[tokio::test]
92    async fn test_memory_operations_comprehensive() -> Result<(), ClError> {
93        println!("\n=== MEMORY OPERATIONS ===");
94        let platform = ClPlatform::default()?;
95        let devices = platform.get_all_devices()?;
96        let device = devices.first().unwrap();
97        let executor = crate::async_executor::AsyncExecutor::new_from_devices(&devices)?;
98        
99        // 1. Buffer Test
100        let size = 1024 * 1024; // 1MB
101        let mut host_data: Vec<f32> = vec![42.0; size];
102        time_it!("Buffer allocation (1MB) [High-level]", {
103            executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], size * 4, host_data.as_mut_ptr() as *mut c_void)?
104        });
105
106        // 2. Image Test (Conditional)
107        if device.get_image_support().unwrap_or(false) {
108            let formats = ClImageFormats::rgba_unorm_int8();
109            let desc = ClImageDesc {
110                image_type: crate::cl_types::cl_image::image_type::ClImageType::Image2D,
111                image_width: Some(512),
112                image_height: Some(512),
113                ..Default::default()
114            };
115            let _image = time_it!("Image creation (512x512 RGBA) [High-level]", {
116                executor.create_image(&[MemoryFlags::ReadWrite], &formats, &desc, null_mut())?
117            });
118        }
119
120        // 3. SVM Test (Conditional OpenCL 2.0+)
121        if device.get_opencl_version() >= crate::cl_types::cl_device::opencl_version::OpenCLVersion::V2_0 {
122            let svm = time_it!("SVM allocation (1024 f32) [High-level]", {
123                executor.create_svm_buffer::<f32>(&[MemoryFlags::ReadWrite], 1024)?
124            });
125            if let Ok(svm_caps) = device.get_svm_capabilities() {
126                println!("  SVM Capabilites: {:?}", svm_caps);
127            }
128            drop(svm);
129        }
130
131        Ok(())
132    }
133
134    #[tokio::test]
135    async fn test_executor_full_pipeline() -> Result<(), ClError> {
136        println!("\n=== EXECUTOR PIPELINE ===");
137        let executor = time_it!("Executor initialization (Best Platform)", {
138            crate::async_executor::AsyncExecutor::new_best_platform_with_options(true)?
139        });
140        
141        let path = "./tests/program1test/add.cl";
142        if !std::path::Path::new(path).exists() {
143            println!("Skipping pipeline test: kernel file not found at {}", path);
144            return Ok(());
145        }
146
147        let source = std::fs::read_to_string(path).unwrap();
148        
149        let builded = time_it!("Program build (High-level)", {
150            executor.build_program(source, None)?
151        });
152        
153        let kernel = time_it!("Kernel creation (High-level)", {
154            executor.create_kernel(&builded, "add")?
155        });
156
157        let size = 1024 * 1024;
158        let mut a: Vec<f32> = vec![1.0; size];
159        let mut b: Vec<f32> = vec![2.0; size];
160        let buffer_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], size * 4, a.as_mut_ptr() as *mut c_void)?;
161        let buffer_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], size * 4, b.as_mut_ptr() as *mut c_void)?;
162
163        let report = time_it!("Task execution (AsyncExecutor)", {
164            executor.create_task(&kernel)
165                .arg_buffer(0, &buffer_a)
166                .arg_buffer(1, &buffer_b)
167                .global_work_dims(size, 1, 1)
168                .read_buffer(&buffer_a, &mut a)
169                .run()
170                .await?
171        });
172
173        println!("--- Profiling Results ---");
174        println!("  Kernel Time (GPU): {} ns", report.total_kernel_duration_ns());
175        println!("  Read Time (GPU):   {} ns", report.total_read_duration_ns());
176        
177        assert!((a[0] - 3.0).abs() < 1e-5);
178        Ok(())
179    }
180
181    #[tokio::test]
182    async fn test_concurrency_stress() -> Result<(), ClError> {
183        println!("\n=== CONCURRENCY STRESS ===");
184        let executor = Arc::new(crate::async_executor::AsyncExecutor::new_best_platform()?);
185        let path = "./tests/program1test/add.cl";
186        if !std::path::Path::new(path).exists() { return Ok(()); }
187        
188        let source = std::fs::read_to_string(path).unwrap();
189        let builded = executor.build_program(source, None)?;
190        
191        let mut tasks = Vec::new();
192        for i in 0..10 {
193            let kernel = executor.create_kernel(&builded, "add")?;
194            let mut data = vec![i as f32; 1024];
195            let buf = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
196            let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
197            
198            let executor_clone = executor.clone();
199            tasks.push(async move {
200                executor_clone.create_task(&kernel)
201                    .arg_buffer(0, &buf)
202                    .arg_buffer(1, &buf_b)
203                    .global_work_dims(1024, 1, 1)
204                    .run()
205                    .await
206            });
207        }
208        
209        time_it!("10 Concurrent Tasks Submission", {
210            futures::future::join_all(tasks).await;
211        });
212        
213        Ok(())
214    }
215
216    #[tokio::test]
217    async fn test_minimalist_api() -> Result<(), ClError> {
218        println!("\n=== MINIMALIST API EXAMPLE ===");
219        
220        // 1. Initialize
221        let executor = crate::async_executor::AsyncExecutor::new_best_platform()?;
222        
223        // 2. Build & Create Kernel
224        let program = executor.build_program("kernel void add(global float* a, global float* b) { a[get_global_id(0)] += b[get_global_id(0)]; }".to_string(), None)?;
225        let kernel = executor.create_kernel(&program, "add")?;
226        
227        // 3. Simple buffers
228        let mut data: Vec<f32> = vec![10.0; 1024];
229        let other: Vec<f32> = vec![5.0; 1024];
230        let buf_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
231        let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, other.as_ptr() as *mut c_void)?;
232        
233        // 4. Run declaratively
234        executor.create_task(&kernel)
235            .arg_buffer(0, &buf_a)
236            .arg_buffer(1, &buf_b)
237            .global_work_dims(1024, 1, 1)
238            .read_buffer(&buf_a, &mut data)
239            .run()
240            .await?;
241            
242        println!("Minimalist result: {}", data[0]);
243        assert_eq!(data[0], 15.0);
244        Ok(())
245    }
246
247    async fn test_write_buffer() -> Result<(), ClError> {
248        let executor = crate::async_executor::AsyncExecutor::new_best_platform()?;
249        
250        let program = executor.build_program("kernel void add(global float* a, global float* b) { a[get_global_id(0)] += b[get_global_id(0)]; }".to_string(), None)?;
251        let kernel = executor.create_kernel(&program, "add")?;
252    
253        let mut data: Vec<f32> = vec![10.0; 1024];
254        let other: Vec<f32> = vec![5.0; 1024];
255        let buf_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
256        let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, other.as_ptr() as *mut c_void)?;
257        
258        Ok(())
259    }
260}