easy_async_opencl3/
lib.rs

1//! # easy-async-cl3
2//! 
3//! A high-level, async-first Rust wrapper for OpenCL with intelligent GPU management.
4//! 
5//! This library provides:
6//! - **Async/await support**: All GPU operations return futures
7//! - **Automatic resource management**: RAII-based cleanup
8//! - **Multi-GPU support**: Automatic work distribution
9//! - **Type-safe API**: Compile-time guarantees
10//! - **Profiling support**: Built-in performance measurement
11//! - **Modern OpenCL features**: Support for OpenCL 1.1 through 3.0
12
13pub mod async_executor;
14#[allow(unused)]
15#[allow(dead_code)]
16pub mod cl_types;
17pub mod error;
18
19pub fn add(left: u64, right: u64) -> u64 {
20    left + right
21}
22
23#[cfg(test)]
24mod tests {
25    use std::sync::Arc;
26    use std::time::Instant;
27    use std::{ffi::c_void, ptr::null_mut};
28
29    use crate::{
30        cl_types::{
31            cl_command_queue::{
32                ClCommandQueue,
33                command_queue_parameters::{CommandQueueProperties, Version20},
34            }, 
35            cl_context::ClContext, 
36            cl_platform::ClPlatform, 
37            cl_image::{image_desc::ClImageDesc, image_formats::ClImageFormats},
38            memory_flags::MemoryFlags
39        },
40        error::ClError,
41    };
42
43    macro_rules! time_it {
44        ($label:expr, $block:block) => {{
45            let start = Instant::now();
46            let result = $block;
47            let duration = start.elapsed();
48            println!("[TIMER] {}: {:?}", $label, duration);
49            result
50        }};
51    }
52
53    #[test]
54    fn test_hardware_discovery() -> Result<(), ClError> {
55        println!("\n=== HARDWARE DISCOVERY ===");
56        let platforms = time_it!("Platform enumeration", { ClPlatform::get_all()? });
57        
58        for (i, platform) in platforms.iter().enumerate() {
59            println!("Platform [{}]: {}", i, platform);
60            let devices = time_it!(format!("Device discovery (Platform {})", i), { platform.get_all_devices()? });
61            for device in devices {
62                println!("  - Device: {}", device);
63                println!("    Version: {}", device.get_opencl_version());
64                println!("    Max Compute Units: {}", device.get_max_compute_units().unwrap_or(0));
65            }
66        }
67        Ok(())
68    }
69
70    #[test]
71    fn test_core_resource_lifecycle() -> Result<(), ClError> {
72        println!("\n=== RESOURCE LIFECYCLE ===");
73        let platform = ClPlatform::default()?;
74        let devices = platform.get_all_devices()?;
75        
76        let context = time_it!("Context creation", { ClContext::new(&devices)? });
77        
78        for (i, device) in devices.iter().enumerate() {
79            let props = CommandQueueProperties::<Version20>::new()
80                .set_cl_queue_properties(true, true, false, false)
81                .get_properties();
82            
83            let _queue = time_it!(format!("Queue creation (Device {})", i), {
84                ClCommandQueue::create_command_queue_with_properties(&context, device, &props)?
85            });
86        }
87        Ok(())
88    }
89
90    #[tokio::test]
91    async fn test_memory_operations_comprehensive() -> Result<(), ClError> {
92        println!("\n=== MEMORY OPERATIONS ===");
93        let platform = ClPlatform::default()?;
94        let devices = platform.get_all_devices()?;
95        let device = devices.first().unwrap();
96        let executor = crate::async_executor::AsyncExecutor::new_from_devices(&devices)?;
97        
98        // 1. Buffer Test
99        let size = 1024 * 1024; // 1MB
100        let mut host_data: Vec<f32> = vec![42.0; size];
101        time_it!("Buffer allocation (1MB) [High-level]", {
102            executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], size * 4, host_data.as_mut_ptr() as *mut c_void)?
103        });
104
105        // 2. Image Test (Conditional)
106        if device.get_image_support().unwrap_or(false) {
107            let formats = ClImageFormats::rgba_unorm_int8();
108            let desc = ClImageDesc {
109                image_type: crate::cl_types::cl_image::image_type::ClImageType::Image2D,
110                image_width: Some(512),
111                image_height: Some(512),
112                ..Default::default()
113            };
114            let _image = time_it!("Image creation (512x512 RGBA) [High-level]", {
115                executor.create_image(&[MemoryFlags::ReadWrite], &formats, &desc, null_mut())?
116            });
117        }
118
119        // 3. SVM Test (Conditional OpenCL 2.0+)
120        if device.get_opencl_version() >= crate::cl_types::cl_device::opencl_version::OpenCLVersion::V2_0 {
121            let svm = time_it!("SVM allocation (1024 f32) [High-level]", {
122                executor.create_svm_buffer::<f32>(&[MemoryFlags::ReadWrite], 1024)?
123            });
124            if let Ok(svm_caps) = device.get_svm_capabilities() {
125                println!("  SVM Capabilites: {:?}", svm_caps);
126            }
127            drop(svm);
128        }
129
130        Ok(())
131    }
132
133    #[tokio::test]
134    async fn test_executor_full_pipeline() -> Result<(), ClError> {
135        println!("\n=== EXECUTOR PIPELINE ===");
136        let executor = time_it!("Executor initialization (Best Platform)", {
137            crate::async_executor::AsyncExecutor::new_best_platform_with_options(true)?
138        });
139        
140        let path = "./tests/program1test/add.cl";
141        if !std::path::Path::new(path).exists() {
142            println!("Skipping pipeline test: kernel file not found at {}", path);
143            return Ok(());
144        }
145
146        let source = std::fs::read_to_string(path).unwrap();
147        
148        let builded = time_it!("Program build (High-level)", {
149            executor.build_program(source, None)?
150        });
151        
152        let kernel = time_it!("Kernel creation (High-level)", {
153            executor.create_kernel(&builded, "add")?
154        });
155
156        let size = 1024 * 1024;
157        let mut a: Vec<f32> = vec![1.0; size];
158        let mut b: Vec<f32> = vec![2.0; size];
159        let buffer_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], size * 4, a.as_mut_ptr() as *mut c_void)?;
160        let buffer_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], size * 4, b.as_mut_ptr() as *mut c_void)?;
161
162        let report = time_it!("Task execution (AsyncExecutor)", {
163            executor.create_task(kernel)
164                .arg_buffer(0, &buffer_a)
165                .arg_buffer(1, &buffer_b)
166                .global_work_dims(size, 1, 1)
167                .read_buffer(&buffer_a, &mut a)
168                .run()
169                .await?
170        });
171
172        println!("--- Profiling Results ---");
173        println!("  Kernel Time (GPU): {} ns", report.total_kernel_duration_ns());
174        println!("  Read Time (GPU):   {} ns", report.total_read_duration_ns());
175        
176        assert!((a[0] - 3.0).abs() < 1e-5);
177        Ok(())
178    }
179
180    #[tokio::test]
181    async fn test_concurrency_stress() -> Result<(), ClError> {
182        println!("\n=== CONCURRENCY STRESS ===");
183        let executor = Arc::new(crate::async_executor::AsyncExecutor::new_best_platform()?);
184        let path = "./tests/program1test/add.cl";
185        if !std::path::Path::new(path).exists() { return Ok(()); }
186        
187        let source = std::fs::read_to_string(path).unwrap();
188        let builded = executor.build_program(source, None)?;
189        
190        let mut tasks = Vec::new();
191        for i in 0..10 {
192            let kernel = executor.create_kernel(&builded, "add")?;
193            let mut data = vec![i as f32; 1024];
194            let buf = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
195            let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
196            
197            let executor_clone = executor.clone();
198            tasks.push(async move {
199                executor_clone.create_task(kernel)
200                    .arg_buffer(0, &buf)
201                    .arg_buffer(1, &buf_b)
202                    .global_work_dims(1024, 1, 1)
203                    .run()
204                    .await
205            });
206        }
207        
208        time_it!("10 Concurrent Tasks Submission", {
209            futures::future::join_all(tasks).await;
210        });
211        
212        Ok(())
213    }
214
215    #[tokio::test]
216    async fn test_minimalist_api() -> Result<(), ClError> {
217        println!("\n=== MINIMALIST API EXAMPLE ===");
218        
219        // 1. Initialize
220        let executor = crate::async_executor::AsyncExecutor::new_best_platform()?;
221        
222        // 2. Build & Create Kernel
223        let program = executor.build_program("kernel void add(global float* a, global float* b) { a[get_global_id(0)] += b[get_global_id(0)]; }".to_string(), None)?;
224        let kernel = executor.create_kernel(&program, "add")?;
225        
226        // 3. Simple buffers
227        let mut data: Vec<f32> = vec![10.0; 1024];
228        let other: Vec<f32> = vec![5.0; 1024];
229        let buf_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
230        let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, other.as_ptr() as *mut c_void)?;
231        
232        // 4. Run declaratively
233        executor.create_task(kernel)
234            .arg_buffer(0, &buf_a)
235            .arg_buffer(1, &buf_b)
236            .global_work_dims(1024, 1, 1)
237            .read_buffer(&buf_a, &mut data)
238            .run()
239            .await?;
240            
241        println!("Minimalist result: {}", data[0]);
242        assert_eq!(data[0], 15.0);
243        Ok(())
244    }
245
246    async fn test_write_buffer() -> Result<(), ClError> {
247        let executor = crate::async_executor::AsyncExecutor::new_best_platform()?;
248        
249        let program = executor.build_program("kernel void add(global float* a, global float* b) { a[get_global_id(0)] += b[get_global_id(0)]; }".to_string(), None)?;
250        let kernel = executor.create_kernel(&program, "add")?;
251    
252        let mut data: Vec<f32> = vec![10.0; 1024];
253        let other: Vec<f32> = vec![5.0; 1024];
254        let buf_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
255        let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, other.as_ptr() as *mut c_void)?;
256        
257        Ok(())
258    }
259}