1pub mod async_executor;
14pub use async_executor::pipeline_task::{PipelineBuilder, PipelineReport};
15#[allow(unused)]
16#[allow(dead_code)]
17pub mod cl_types;
18pub mod error;
19
20pub fn add(left: u64, right: u64) -> u64 {
21 left + right
22}
23
24#[cfg(test)]
25mod tests {
26 use std::sync::Arc;
27 use std::time::Instant;
28 use std::{ffi::c_void, ptr::null_mut};
29
30 use crate::{
31 cl_types::{
32 cl_command_queue::{
33 ClCommandQueue,
34 command_queue_parameters::{CommandQueueProperties, Version20},
35 },
36 cl_context::ClContext,
37 cl_platform::ClPlatform,
38 cl_image::{image_desc::ClImageDesc, image_formats::ClImageFormats},
39 memory_flags::MemoryFlags
40 },
41 error::ClError,
42 };
43
44 macro_rules! time_it {
45 ($label:expr, $block:block) => {{
46 let start = Instant::now();
47 let result = $block;
48 let duration = start.elapsed();
49 println!("[TIMER] {}: {:?}", $label, duration);
50 result
51 }};
52 }
53
54 #[test]
55 fn test_hardware_discovery() -> Result<(), ClError> {
56 println!("\n=== HARDWARE DISCOVERY ===");
57 let platforms = time_it!("Platform enumeration", { ClPlatform::get_all()? });
58
59 for (i, platform) in platforms.iter().enumerate() {
60 println!("Platform [{}]: {}", i, platform);
61 let devices = time_it!(format!("Device discovery (Platform {})", i), { platform.get_all_devices()? });
62 for device in devices {
63 println!(" - Device: {}", device);
64 println!(" Version: {}", device.get_opencl_version());
65 println!(" Max Compute Units: {}", device.get_max_compute_units().unwrap_or(0));
66 }
67 }
68 Ok(())
69 }
70
71 #[test]
72 fn test_core_resource_lifecycle() -> Result<(), ClError> {
73 println!("\n=== RESOURCE LIFECYCLE ===");
74 let platform = ClPlatform::default()?;
75 let devices = platform.get_all_devices()?;
76
77 let context = time_it!("Context creation", { ClContext::new(&devices)? });
78
79 for (i, device) in devices.iter().enumerate() {
80 let props = CommandQueueProperties::<Version20>::new()
81 .set_cl_queue_properties(true, true, false, false)
82 .get_properties();
83
84 let _queue = time_it!(format!("Queue creation (Device {})", i), {
85 ClCommandQueue::create_command_queue_with_properties(&context, device, &props)?
86 });
87 }
88 Ok(())
89 }
90
91 #[tokio::test]
92 async fn test_memory_operations_comprehensive() -> Result<(), ClError> {
93 println!("\n=== MEMORY OPERATIONS ===");
94 let platform = ClPlatform::default()?;
95 let devices = platform.get_all_devices()?;
96 let device = devices.first().unwrap();
97 let executor = crate::async_executor::AsyncExecutor::new_from_devices(&devices)?;
98
99 let size = 1024 * 1024; let mut host_data: Vec<f32> = vec![42.0; size];
102 time_it!("Buffer allocation (1MB) [High-level]", {
103 executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], size * 4, host_data.as_mut_ptr() as *mut c_void)?
104 });
105
106 if device.get_image_support().unwrap_or(false) {
108 let formats = ClImageFormats::rgba_unorm_int8();
109 let desc = ClImageDesc {
110 image_type: crate::cl_types::cl_image::image_type::ClImageType::Image2D,
111 image_width: Some(512),
112 image_height: Some(512),
113 ..Default::default()
114 };
115 let _image = time_it!("Image creation (512x512 RGBA) [High-level]", {
116 executor.create_image(&[MemoryFlags::ReadWrite], &formats, &desc, null_mut())?
117 });
118 }
119
120 if device.get_opencl_version() >= crate::cl_types::cl_device::opencl_version::OpenCLVersion::V2_0 {
122 let svm = time_it!("SVM allocation (1024 f32) [High-level]", {
123 executor.create_svm_buffer::<f32>(&[MemoryFlags::ReadWrite], 1024)?
124 });
125 if let Ok(svm_caps) = device.get_svm_capabilities() {
126 println!(" SVM Capabilites: {:?}", svm_caps);
127 }
128 drop(svm);
129 }
130
131 Ok(())
132 }
133
134 #[tokio::test]
135 async fn test_executor_full_pipeline() -> Result<(), ClError> {
136 println!("\n=== EXECUTOR PIPELINE ===");
137 let executor = time_it!("Executor initialization (Best Platform)", {
138 crate::async_executor::AsyncExecutor::new_best_platform_with_options(true)?
139 });
140
141 let path = "./tests/program1test/add.cl";
142 if !std::path::Path::new(path).exists() {
143 println!("Skipping pipeline test: kernel file not found at {}", path);
144 return Ok(());
145 }
146
147 let source = std::fs::read_to_string(path).unwrap();
148
149 let builded = time_it!("Program build (High-level)", {
150 executor.build_program(source, None)?
151 });
152
153 let kernel = time_it!("Kernel creation (High-level)", {
154 executor.create_kernel(&builded, "add")?
155 });
156
157 let size = 1024 * 1024;
158 let mut a: Vec<f32> = vec![1.0; size];
159 let mut b: Vec<f32> = vec![2.0; size];
160 let buffer_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], size * 4, a.as_mut_ptr() as *mut c_void)?;
161 let buffer_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], size * 4, b.as_mut_ptr() as *mut c_void)?;
162
163 let report = time_it!("Task execution (AsyncExecutor)", {
164 executor.create_task(&kernel)
165 .arg_buffer(0, &buffer_a)
166 .arg_buffer(1, &buffer_b)
167 .global_work_dims(size, 1, 1)
168 .read_buffer(&buffer_a, &mut a)
169 .run()
170 .await?
171 });
172
173 println!("--- Profiling Results ---");
174 println!(" Kernel Time (GPU): {} ns", report.total_kernel_duration_ns());
175 println!(" Read Time (GPU): {} ns", report.total_read_duration_ns());
176
177 assert!((a[0] - 3.0).abs() < 1e-5);
178 Ok(())
179 }
180
181 #[tokio::test]
182 async fn test_concurrency_stress() -> Result<(), ClError> {
183 println!("\n=== CONCURRENCY STRESS ===");
184 let executor = Arc::new(crate::async_executor::AsyncExecutor::new_best_platform()?);
185 let path = "./tests/program1test/add.cl";
186 if !std::path::Path::new(path).exists() { return Ok(()); }
187
188 let source = std::fs::read_to_string(path).unwrap();
189 let builded = executor.build_program(source, None)?;
190
191 let mut tasks = Vec::new();
192 for i in 0..10 {
193 let kernel = executor.create_kernel(&builded, "add")?;
194 let mut data = vec![i as f32; 1024];
195 let buf = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
196 let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
197
198 let executor_clone = executor.clone();
199 tasks.push(async move {
200 executor_clone.create_task(&kernel)
201 .arg_buffer(0, &buf)
202 .arg_buffer(1, &buf_b)
203 .global_work_dims(1024, 1, 1)
204 .run()
205 .await
206 });
207 }
208
209 time_it!("10 Concurrent Tasks Submission", {
210 futures::future::join_all(tasks).await;
211 });
212
213 Ok(())
214 }
215
216 #[tokio::test]
217 async fn test_minimalist_api() -> Result<(), ClError> {
218 println!("\n=== MINIMALIST API EXAMPLE ===");
219
220 let executor = crate::async_executor::AsyncExecutor::new_best_platform()?;
222
223 let program = executor.build_program("kernel void add(global float* a, global float* b) { a[get_global_id(0)] += b[get_global_id(0)]; }".to_string(), None)?;
225 let kernel = executor.create_kernel(&program, "add")?;
226
227 let mut data: Vec<f32> = vec![10.0; 1024];
229 let other: Vec<f32> = vec![5.0; 1024];
230 let buf_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
231 let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, other.as_ptr() as *mut c_void)?;
232
233 executor.create_task(&kernel)
235 .arg_buffer(0, &buf_a)
236 .arg_buffer(1, &buf_b)
237 .global_work_dims(1024, 1, 1)
238 .read_buffer(&buf_a, &mut data)
239 .run()
240 .await?;
241
242 println!("Minimalist result: {}", data[0]);
243 assert_eq!(data[0], 15.0);
244 Ok(())
245 }
246
247 async fn test_write_buffer() -> Result<(), ClError> {
248 let executor = crate::async_executor::AsyncExecutor::new_best_platform()?;
249
250 let program = executor.build_program("kernel void add(global float* a, global float* b) { a[get_global_id(0)] += b[get_global_id(0)]; }".to_string(), None)?;
251 let kernel = executor.create_kernel(&program, "add")?;
252
253 let mut data: Vec<f32> = vec![10.0; 1024];
254 let other: Vec<f32> = vec![5.0; 1024];
255 let buf_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
256 let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, other.as_ptr() as *mut c_void)?;
257
258 Ok(())
259 }
260}