1use scirs2_datasets::{
10 get_optimal_gpu_config, is_cuda_available, is_opencl_available, list_gpu_devices,
11 make_blobs_auto_gpu, make_classification, make_classification_auto_gpu,
12 make_regression_auto_gpu, GpuBackend, GpuBenchmark, GpuConfig, GpuContext, GpuMemoryConfig,
13};
14use std::collections::HashMap;
15use std::time::Instant;
16
17#[allow(dead_code)]
18fn main() -> Result<(), Box<dyn std::error::Error>> {
19 println!("๐ GPU Acceleration Demonstration");
20 println!("=================================\n");
21
22 demonstrate_gpu_detection();
24
25 demonstrate_device_listing()?;
27
28 demonstrate_backend_comparison()?;
30
31 demonstrate_performance_benchmarks()?;
33
34 demonstrate_memory_management()?;
36
37 demonstrate_real_world_scenarios()?;
39
40 println!("\n๐ GPU acceleration demonstration completed!");
41 Ok(())
42}
43
44#[allow(dead_code)]
45fn demonstrate_gpu_detection() {
46 println!("๐ GPU DETECTION AND AVAILABILITY");
47 println!("{}", "-".repeat(40));
48
49 println!("CUDA Support:");
50 if is_cuda_available() {
51 println!(" โ
CUDA is available");
52 println!(" ๐ฏ NVIDIA GPU acceleration supported");
53 } else {
54 println!(" โ CUDA not available");
55 println!(" ๐ก Install CUDA toolkit for NVIDIA GPU support");
56 }
57
58 println!("\nOpenCL Support:");
59 if is_opencl_available() {
60 println!(" โ
OpenCL is available");
61 println!(" ๐ฏ Multi-vendor GPU acceleration supported");
62 } else {
63 println!(" โ OpenCL not available");
64 println!(" ๐ก Install OpenCL runtime for GPU support");
65 }
66
67 let optimal_config = get_optimal_gpu_config();
69 println!("\nOptimal Configuration:");
70 match optimal_config.backend {
71 GpuBackend::Cuda { device_id } => {
72 println!(" ๐ CUDA backend (device {device_id})");
73 }
74 GpuBackend::OpenCl {
75 platform_id,
76 device_id,
77 } => {
78 println!(" ๐ OpenCL backend (platform {platform_id}, device {device_id})");
79 }
80 GpuBackend::Cpu => {
81 println!(" ๐ป CPU fallback (no GPU available)");
82 }
83 }
84 println!(
85 " ๐งต Threads per block: {}",
86 optimal_config.threads_per_block
87 );
88 println!(
89 " ๐ข Double precision: {}",
90 optimal_config.enable_double_precision
91 );
92
93 println!();
94}
95
96#[allow(dead_code)]
97fn demonstrate_device_listing() -> Result<(), Box<dyn std::error::Error>> {
98 println!("๐ AVAILABLE GPU DEVICES");
99 println!("{}", "-".repeat(40));
100
101 let devices = list_gpu_devices()?;
102
103 if devices.is_empty() {
104 println!("No GPU devices found. Using CPU fallback.");
105 } else {
106 println!("Found {} device(s):", devices.len());
107
108 for (i, device) in devices.iter().enumerate() {
109 println!("\nDevice {i}:");
110 println!(" Name: {}", device.name);
111 println!(" Total Memory: {} MB", device.total_memory_mb);
112 println!(" Available Memory: {} MB", device.available_memory_mb);
113 println!(" Compute Units: {}", device.compute_units);
114 println!(" Max Work Group: {}", device.max_work_group_size);
115 println!(" Compute Capability: {}", device.compute_capability);
116 println!(
117 " Double Precision: {}",
118 if device.supports_double_precision {
119 "โ
"
120 } else {
121 "โ"
122 }
123 );
124
125 let utilization = (device.total_memory_mb - device.available_memory_mb) as f64
127 / device.total_memory_mb as f64
128 * 100.0;
129 println!(" Memory Utilization: {utilization:.1}%");
130 }
131 }
132
133 println!();
134 Ok(())
135}
136
137#[allow(dead_code)]
138fn demonstrate_backend_comparison() -> Result<(), Box<dyn std::error::Error>> {
139 println!("โก GPU BACKEND COMPARISON");
140 println!("{}", "-".repeat(40));
141
142 let testsize = 50_000;
143 let features = 20;
144
145 println!("Comparing backends for {testsize} samples with {features} features:");
146
147 let backends = vec![
149 ("CPU Fallback", GpuBackend::Cpu),
150 ("CUDA", GpuBackend::Cuda { device_id: 0 }),
151 (
152 "OpenCL",
153 GpuBackend::OpenCl {
154 platform_id: 0,
155 device_id: 0,
156 },
157 ),
158 ];
159
160 let mut results: HashMap<String, std::time::Duration> = HashMap::new();
161
162 for (name, backend) in backends {
163 println!("\nTesting {name}:");
164
165 let config = GpuConfig {
166 backend: backend.clone(),
167 threads_per_block: 256,
168 enable_double_precision: true,
169 ..Default::default()
170 };
171
172 match GpuContext::new(config) {
173 Ok(context) => {
174 if context.is_available() {
175 let start = Instant::now();
177 let dataset =
178 context.make_classification_gpu(testsize, features, 5, 2, 15, Some(42))?;
179 let duration = start.elapsed();
180
181 results.insert(name.to_string(), duration);
182
183 println!(
184 " โ
Classification: {} samples in {:.2}ms",
185 dataset.n_samples(),
186 duration.as_millis()
187 );
188 println!(
189 " ๐ Throughput: {:.1} samples/s",
190 dataset.n_samples() as f64 / duration.as_secs_f64()
191 );
192 } else {
193 println!(" โ Backend not available");
194 }
195 }
196 Err(e) => {
197 println!(" โ Error: {e}");
198 }
199 }
200 }
201
202 if let Some(cpu_time) = results.get("CPU Fallback") {
204 println!("\nSpeedup Analysis:");
205 for (backend, gpu_time) in &results {
206 if backend != "CPU Fallback" {
207 let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
208 println!(" {backend}: {speedup:.1}x faster than CPU");
209 }
210 }
211 }
212
213 println!();
214 Ok(())
215}
216
217#[allow(dead_code)]
218fn demonstrate_performance_benchmarks() -> Result<(), Box<dyn std::error::Error>> {
219 println!("๐ PERFORMANCE BENCHMARKS");
220 println!("{}", "-".repeat(40));
221
222 let config = get_optimal_gpu_config();
223 let benchmark = GpuBenchmark::new(config)?;
224
225 println!("Running data generation benchmarks...");
226 let data_results = benchmark.benchmark_data_generation()?;
227 data_results.print_results();
228
229 println!("\nRunning matrix operation benchmarks...");
230 let matrix_results = benchmark.benchmark_matrix_operations()?;
231 matrix_results.print_results();
232
233 println!("\nCPU vs GPU Comparison:");
235 demonstrate_cpu_gpu_comparison()?;
236
237 println!();
238 Ok(())
239}
240
241#[allow(dead_code)]
242fn demonstrate_cpu_gpu_comparison() -> Result<(), Box<dyn std::error::Error>> {
243 let dataset_sizes = vec![10_000, 50_000, 100_000];
244
245 println!(
246 "{:<12} {:<15} {:<15} {:<10}",
247 "Size", "CPU Time", "GPU Time", "Speedup"
248 );
249 println!("{}", "-".repeat(55));
250
251 for &size in &dataset_sizes {
252 let cpu_start = Instant::now();
254 let _cpudataset = make_classification(size, 20, 5, 2, 15, Some(42))?;
255 let cpu_time = cpu_start.elapsed();
256
257 let gpu_start = Instant::now();
259 let _gpudataset = make_classification_auto_gpu(size, 20, 5, 2, 15, Some(42))?;
260 let gpu_time = gpu_start.elapsed();
261
262 let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
263
264 println!(
265 "{:<12} {:<15} {:<15} {:<10.1}x",
266 size,
267 format!("{:.1}ms", cpu_time.as_millis()),
268 format!("{:.1}ms", gpu_time.as_millis()),
269 speedup
270 );
271 }
272
273 Ok(())
274}
275
276#[allow(dead_code)]
277fn demonstrate_memory_management() -> Result<(), Box<dyn std::error::Error>> {
278 println!("๐พ GPU MEMORY MANAGEMENT");
279 println!("{}", "-".repeat(40));
280
281 let memory_config = GpuMemoryConfig {
283 max_memory_mb: Some(512), pool_size_mb: 256, enable_coalescing: true, use_unified_memory: false, };
288
289 let gpu_config = GpuConfig {
290 backend: get_optimal_gpu_config().backend,
291 memory: memory_config,
292 threads_per_block: 256,
293 ..Default::default()
294 };
295
296 println!("Memory Configuration:");
297 println!(
298 " Max Memory: {} MB",
299 gpu_config.memory.max_memory_mb.unwrap_or(0)
300 );
301 println!(" Pool Size: {} MB", gpu_config.memory.pool_size_mb);
302 println!(" Coalescing: {}", gpu_config.memory.enable_coalescing);
303 println!(" Unified Memory: {}", gpu_config.memory.use_unified_memory);
304
305 let context = GpuContext::new(gpu_config)?;
306 let device_info = context.device_info();
307
308 println!("\nDevice Memory Info:");
309 println!(" Total: {} MB", device_info.total_memory_mb);
310 println!(" Available: {} MB", device_info.available_memory_mb);
311 println!(
312 " Utilization: {:.1}%",
313 (device_info.total_memory_mb - device_info.available_memory_mb) as f64
314 / device_info.total_memory_mb as f64
315 * 100.0
316 );
317
318 println!("\nTesting memory-efficient dataset generation...");
320
321 let sizes = vec![10_000, 25_000, 50_000];
322 for &size in &sizes {
323 let start = Instant::now();
324
325 match context.make_regression_gpu(size, 50, 30, 0.1, Some(42)) {
326 Ok(dataset) => {
327 let duration = start.elapsed();
328 let memory_estimate = dataset.n_samples() * dataset.n_features() * 8; println!(
331 " {} samples: {:.1}ms (~{:.1} MB)",
332 size,
333 duration.as_millis(),
334 memory_estimate as f64 / (1024.0 * 1024.0)
335 );
336 }
337 Err(e) => {
338 println!(" {size} samples: Failed - {e}");
339 }
340 }
341 }
342
343 println!();
344 Ok(())
345}
346
347#[allow(dead_code)]
348fn demonstrate_real_world_scenarios() -> Result<(), Box<dyn std::error::Error>> {
349 println!("๐ REAL-WORLD GPU SCENARIOS");
350 println!("{}", "-".repeat(40));
351
352 println!("Scenario 1: Large-scale synthetic data generation");
354 demonstrate_large_scale_generation()?;
355
356 println!("\nScenario 2: Rapid prototyping workflow");
358 demonstrate_rapid_prototyping()?;
359
360 println!("\nScenario 3: Batch dataset processing");
362 demonstrate_batch_processing()?;
363
364 Ok(())
365}
366
367#[allow(dead_code)]
368fn demonstrate_large_scale_generation() -> Result<(), Box<dyn std::error::Error>> {
369 println!(" ๐ฏ Goal: Generate 1M samples across multiple datasets");
370 println!(" ๐ Using GPU acceleration for maximum throughput");
371
372 let total_samples = 1_000_000;
373 let features = 100;
374
375 let mut generation_times = Vec::new();
377 let start_total = Instant::now();
378
379 let start = Instant::now();
381 let classification =
382 make_classification_auto_gpu(total_samples, features, 10, 2, 50, Some(42))?;
383 let class_time = start.elapsed();
384 generation_times.push(("Classification", class_time, classification.n_samples()));
385
386 let start = Instant::now();
388 let regression = make_regression_auto_gpu(total_samples, features, 60, 0.1, Some(43))?;
389 let reg_time = start.elapsed();
390 generation_times.push(("Regression", reg_time, regression.n_samples()));
391
392 let start = Instant::now();
394 let clustering = make_blobs_auto_gpu(total_samples, 50, 20, 1.5, Some(44))?;
395 let cluster_time = start.elapsed();
396 generation_times.push(("Clustering", cluster_time, clustering.n_samples()));
397
398 let total_time = start_total.elapsed();
399
400 println!(" โ
Generation Results:");
401 for (name, time, samples) in generation_times {
402 let throughput = samples as f64 / time.as_secs_f64();
403 println!(
404 " {}: {:.1}s ({:.1}K samples/s)",
405 name,
406 time.as_secs_f64(),
407 throughput / 1000.0
408 );
409 }
410
411 let total_samples_generated =
412 classification.n_samples() + regression.n_samples() + clustering.n_samples();
413 let overall_throughput = total_samples_generated as f64 / total_time.as_secs_f64();
414
415 println!(
416 " ๐ Overall: {} samples in {:.1}s ({:.1}K samples/s)",
417 total_samples_generated,
418 total_time.as_secs_f64(),
419 overall_throughput / 1000.0
420 );
421
422 Ok(())
423}
424
425#[allow(dead_code)]
426fn demonstrate_rapid_prototyping() -> Result<(), Box<dyn std::error::Error>> {
427 println!(" ๐ฏ Goal: Quickly test different dataset configurations");
428 println!(" โก Using GPU for instant feedback");
429
430 let configurations = vec![
431 ("Small Dense", 1_000, 20, 5),
432 ("Medium Sparse", 10_000, 100, 20),
433 ("Large High-Dim", 100_000, 500, 100),
434 ];
435
436 for (name, samples, features, informative) in configurations {
437 let start = Instant::now();
438
439 let dataset = make_classification_auto_gpu(samples, features, 5, 2, informative, Some(42))?;
440 let duration = start.elapsed();
441
442 let memory_usage = dataset.n_samples() * dataset.n_features() * 8; let density = informative as f64 / features as f64;
445
446 println!(
447 " {}: {} in {:.1}ms",
448 name,
449 format_number(dataset.n_samples()),
450 duration.as_millis()
451 );
452 println!(
453 " Features: {} (density: {:.1}%)",
454 features,
455 density * 100.0
456 );
457 println!(
458 " Memory: {:.1} MB",
459 memory_usage as f64 / (1024.0 * 1024.0)
460 );
461 }
462
463 Ok(())
464}
465
466#[allow(dead_code)]
467fn demonstrate_batch_processing() -> Result<(), Box<dyn std::error::Error>> {
468 println!(" ๐ฏ Goal: Process multiple dataset requests in parallel");
469 println!(" ๐ Simulating production workload");
470
471 let requests = vec![
473 ("User A - Classification", 5_000, 30, "classification"),
474 ("User B - Regression", 8_000, 25, "regression"),
475 ("User C - Clustering", 3_000, 15, "clustering"),
476 ("User D - Classification", 12_000, 40, "classification"),
477 ("User E - Regression", 6_000, 35, "regression"),
478 ];
479
480 let batch_start = Instant::now();
481 let mut total_samples = 0;
482
483 for (requestname, samples, features, dataset_type) in requests {
484 let start = Instant::now();
485
486 let dataset = match dataset_type {
487 "classification" => {
488 make_classification_auto_gpu(samples, features, 5, 2, features / 2, Some(42))?
489 }
490 "regression" => {
491 make_regression_auto_gpu(samples, features, features / 2, 0.1, Some(42))?
492 }
493 "clustering" => make_blobs_auto_gpu(samples, features, 8, 1.0, Some(42))?,
494 _ => unreachable!(),
495 };
496
497 let duration = start.elapsed();
498 total_samples += dataset.n_samples();
499
500 println!(
501 " {}: {} samples in {:.1}ms",
502 requestname,
503 dataset.n_samples(),
504 duration.as_millis()
505 );
506 }
507
508 let batch_duration = batch_start.elapsed();
509 let batch_throughput = total_samples as f64 / batch_duration.as_secs_f64();
510
511 println!(" ๐ Batch Summary:");
512 println!(" Total Requests: 5");
513 println!(" Total Samples: {}", format_number(total_samples));
514 println!(" Batch Time: {:.2}s", batch_duration.as_secs_f64());
515 println!(
516 " Throughput: {:.1}K samples/s",
517 batch_throughput / 1000.0
518 );
519
520 Ok(())
521}
522
523#[allow(dead_code)]
525fn format_number(n: usize) -> String {
526 if n >= 1_000_000 {
527 format!("{:.1}M", n as f64 / 1_000_000.0)
528 } else if n >= 1_000 {
529 format!("{:.1}K", n as f64 / 1_000.0)
530 } else {
531 n.to_string()
532 }
533}