1pub mod async_executor;
14#[allow(unused)]
15#[allow(dead_code)]
16pub mod cl_types;
17pub mod error;
18
19pub fn add(left: u64, right: u64) -> u64 {
20 left + right
21}
22
23#[cfg(test)]
24mod tests {
25 use std::sync::Arc;
26 use std::time::Instant;
27 use std::{ffi::c_void, ptr::null_mut};
28
29 use crate::{
30 cl_types::{
31 cl_command_queue::{
32 ClCommandQueue,
33 command_queue_parameters::{CommandQueueProperties, Version20},
34 },
35 cl_context::ClContext,
36 cl_platform::ClPlatform,
37 cl_image::{image_desc::ClImageDesc, image_formats::ClImageFormats},
38 memory_flags::MemoryFlags
39 },
40 error::ClError,
41 };
42
43 macro_rules! time_it {
44 ($label:expr, $block:block) => {{
45 let start = Instant::now();
46 let result = $block;
47 let duration = start.elapsed();
48 println!("[TIMER] {}: {:?}", $label, duration);
49 result
50 }};
51 }
52
53 #[test]
54 fn test_hardware_discovery() -> Result<(), ClError> {
55 println!("\n=== HARDWARE DISCOVERY ===");
56 let platforms = time_it!("Platform enumeration", { ClPlatform::get_all()? });
57
58 for (i, platform) in platforms.iter().enumerate() {
59 println!("Platform [{}]: {}", i, platform);
60 let devices = time_it!(format!("Device discovery (Platform {})", i), { platform.get_all_devices()? });
61 for device in devices {
62 println!(" - Device: {}", device);
63 println!(" Version: {}", device.get_opencl_version());
64 println!(" Max Compute Units: {}", device.get_max_compute_units().unwrap_or(0));
65 }
66 }
67 Ok(())
68 }
69
70 #[test]
71 fn test_core_resource_lifecycle() -> Result<(), ClError> {
72 println!("\n=== RESOURCE LIFECYCLE ===");
73 let platform = ClPlatform::default()?;
74 let devices = platform.get_all_devices()?;
75
76 let context = time_it!("Context creation", { ClContext::new(&devices)? });
77
78 for (i, device) in devices.iter().enumerate() {
79 let props = CommandQueueProperties::<Version20>::new()
80 .set_cl_queue_properties(true, true, false, false)
81 .get_properties();
82
83 let _queue = time_it!(format!("Queue creation (Device {})", i), {
84 ClCommandQueue::create_command_queue_with_properties(&context, device, &props)?
85 });
86 }
87 Ok(())
88 }
89
90 #[tokio::test]
91 async fn test_memory_operations_comprehensive() -> Result<(), ClError> {
92 println!("\n=== MEMORY OPERATIONS ===");
93 let platform = ClPlatform::default()?;
94 let devices = platform.get_all_devices()?;
95 let device = devices.first().unwrap();
96 let executor = crate::async_executor::AsyncExecutor::new_from_devices(&devices)?;
97
98 let size = 1024 * 1024; let mut host_data: Vec<f32> = vec![42.0; size];
101 time_it!("Buffer allocation (1MB) [High-level]", {
102 executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], size * 4, host_data.as_mut_ptr() as *mut c_void)?
103 });
104
105 if device.get_image_support().unwrap_or(false) {
107 let formats = ClImageFormats::rgba_unorm_int8();
108 let desc = ClImageDesc {
109 image_type: crate::cl_types::cl_image::image_type::ClImageType::Image2D,
110 image_width: Some(512),
111 image_height: Some(512),
112 ..Default::default()
113 };
114 let _image = time_it!("Image creation (512x512 RGBA) [High-level]", {
115 executor.create_image(&[MemoryFlags::ReadWrite], &formats, &desc, null_mut())?
116 });
117 }
118
119 if device.get_opencl_version() >= crate::cl_types::cl_device::opencl_version::OpenCLVersion::V2_0 {
121 let svm = time_it!("SVM allocation (1024 f32) [High-level]", {
122 executor.create_svm_buffer::<f32>(&[MemoryFlags::ReadWrite], 1024)?
123 });
124 if let Ok(svm_caps) = device.get_svm_capabilities() {
125 println!(" SVM Capabilites: {:?}", svm_caps);
126 }
127 drop(svm);
128 }
129
130 Ok(())
131 }
132
133 #[tokio::test]
134 async fn test_executor_full_pipeline() -> Result<(), ClError> {
135 println!("\n=== EXECUTOR PIPELINE ===");
136 let executor = time_it!("Executor initialization (Best Platform)", {
137 crate::async_executor::AsyncExecutor::new_best_platform_with_options(true)?
138 });
139
140 let path = "./tests/program1test/add.cl";
141 if !std::path::Path::new(path).exists() {
142 println!("Skipping pipeline test: kernel file not found at {}", path);
143 return Ok(());
144 }
145
146 let source = std::fs::read_to_string(path).unwrap();
147
148 let builded = time_it!("Program build (High-level)", {
149 executor.build_program(source, None)?
150 });
151
152 let kernel = time_it!("Kernel creation (High-level)", {
153 executor.create_kernel(&builded, "add")?
154 });
155
156 let size = 1024 * 1024;
157 let mut a: Vec<f32> = vec![1.0; size];
158 let mut b: Vec<f32> = vec![2.0; size];
159 let buffer_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], size * 4, a.as_mut_ptr() as *mut c_void)?;
160 let buffer_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], size * 4, b.as_mut_ptr() as *mut c_void)?;
161
162 let report = time_it!("Task execution (AsyncExecutor)", {
163 executor.create_task(kernel)
164 .arg_buffer(0, &buffer_a)
165 .arg_buffer(1, &buffer_b)
166 .global_work_dims(size, 1, 1)
167 .read_buffer(&buffer_a, &mut a)
168 .run()
169 .await?
170 });
171
172 println!("--- Profiling Results ---");
173 println!(" Kernel Time (GPU): {} ns", report.total_kernel_duration_ns());
174 println!(" Read Time (GPU): {} ns", report.total_read_duration_ns());
175
176 assert!((a[0] - 3.0).abs() < 1e-5);
177 Ok(())
178 }
179
180 #[tokio::test]
181 async fn test_concurrency_stress() -> Result<(), ClError> {
182 println!("\n=== CONCURRENCY STRESS ===");
183 let executor = Arc::new(crate::async_executor::AsyncExecutor::new_best_platform()?);
184 let path = "./tests/program1test/add.cl";
185 if !std::path::Path::new(path).exists() { return Ok(()); }
186
187 let source = std::fs::read_to_string(path).unwrap();
188 let builded = executor.build_program(source, None)?;
189
190 let mut tasks = Vec::new();
191 for i in 0..10 {
192 let kernel = executor.create_kernel(&builded, "add")?;
193 let mut data = vec![i as f32; 1024];
194 let buf = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
195 let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
196
197 let executor_clone = executor.clone();
198 tasks.push(async move {
199 executor_clone.create_task(kernel)
200 .arg_buffer(0, &buf)
201 .arg_buffer(1, &buf_b)
202 .global_work_dims(1024, 1, 1)
203 .run()
204 .await
205 });
206 }
207
208 time_it!("10 Concurrent Tasks Submission", {
209 futures::future::join_all(tasks).await;
210 });
211
212 Ok(())
213 }
214
215 #[tokio::test]
216 async fn test_minimalist_api() -> Result<(), ClError> {
217 println!("\n=== MINIMALIST API EXAMPLE ===");
218
219 let executor = crate::async_executor::AsyncExecutor::new_best_platform()?;
221
222 let program = executor.build_program("kernel void add(global float* a, global float* b) { a[get_global_id(0)] += b[get_global_id(0)]; }".to_string(), None)?;
224 let kernel = executor.create_kernel(&program, "add")?;
225
226 let mut data: Vec<f32> = vec![10.0; 1024];
228 let other: Vec<f32> = vec![5.0; 1024];
229 let buf_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
230 let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, other.as_ptr() as *mut c_void)?;
231
232 executor.create_task(kernel)
234 .arg_buffer(0, &buf_a)
235 .arg_buffer(1, &buf_b)
236 .global_work_dims(1024, 1, 1)
237 .read_buffer(&buf_a, &mut data)
238 .run()
239 .await?;
240
241 println!("Minimalist result: {}", data[0]);
242 assert_eq!(data[0], 15.0);
243 Ok(())
244 }
245
246 async fn test_write_buffer() -> Result<(), ClError> {
247 let executor = crate::async_executor::AsyncExecutor::new_best_platform()?;
248
249 let program = executor.build_program("kernel void add(global float* a, global float* b) { a[get_global_id(0)] += b[get_global_id(0)]; }".to_string(), None)?;
250 let kernel = executor.create_kernel(&program, "add")?;
251
252 let mut data: Vec<f32> = vec![10.0; 1024];
253 let other: Vec<f32> = vec![5.0; 1024];
254 let buf_a = executor.create_buffer(&[MemoryFlags::ReadWrite, MemoryFlags::CopyHostPtr], 1024 * 4, data.as_mut_ptr() as *mut c_void)?;
255 let buf_b = executor.create_buffer(&[MemoryFlags::ReadOnly, MemoryFlags::CopyHostPtr], 1024 * 4, other.as_ptr() as *mut c_void)?;
256
257 Ok(())
258 }
259}