get_optimal_gpu_config

Function get_optimal_gpu_config 

Source
pub fn get_optimal_gpu_config() -> GpuConfig
Expand description

Get optimal GPU configuration for the current system

Examples found in repository?
examples/gpu_acceleration.rs (line 68)
45fn demonstrate_gpu_detection() {
46    println!("๐Ÿ” GPU DETECTION AND AVAILABILITY");
47    println!("{}", "-".repeat(40));
48
49    println!("CUDA Support:");
50    if is_cuda_available() {
51        println!("  โœ… CUDA is available");
52        println!("  ๐ŸŽฏ NVIDIA GPU acceleration supported");
53    } else {
54        println!("  โŒ CUDA not available");
55        println!("  ๐Ÿ’ก Install CUDA toolkit for NVIDIA GPU support");
56    }
57
58    println!("\nOpenCL Support:");
59    if is_opencl_available() {
60        println!("  โœ… OpenCL is available");
61        println!("  ๐ŸŽฏ Multi-vendor GPU acceleration supported");
62    } else {
63        println!("  โŒ OpenCL not available");
64        println!("  ๐Ÿ’ก Install OpenCL runtime for GPU support");
65    }
66
67    // Get optimal configuration
68    let optimal_config = get_optimal_gpu_config();
69    println!("\nOptimal Configuration:");
70    match optimal_config.backend {
71        GpuBackend::Cuda { device_id } => {
72            println!("  ๐Ÿš€ CUDA backend (device {device_id})");
73        }
74        GpuBackend::OpenCl {
75            platform_id,
76            device_id,
77        } => {
78            println!("  ๐Ÿš€ OpenCL backend (platform {platform_id}, device {device_id})");
79        }
80        GpuBackend::Cpu => {
81            println!("  ๐Ÿ’ป CPU fallback (no GPU available)");
82        }
83    }
84    println!(
85        "  ๐Ÿงต Threads per block: {}",
86        optimal_config.threads_per_block
87    );
88    println!(
89        "  ๐Ÿ”ข Double precision: {}",
90        optimal_config.enable_double_precision
91    );
92
93    println!();
94}
95
96#[allow(dead_code)]
97fn demonstrate_device_listing() -> Result<(), Box<dyn std::error::Error>> {
98    println!("๐Ÿ“‹ AVAILABLE GPU DEVICES");
99    println!("{}", "-".repeat(40));
100
101    let devices = list_gpu_devices()?;
102
103    if devices.is_empty() {
104        println!("No GPU devices found. Using CPU fallback.");
105    } else {
106        println!("Found {} device(s):", devices.len());
107
108        for (i, device) in devices.iter().enumerate() {
109            println!("\nDevice {i}:");
110            println!("  Name: {}", device.name);
111            println!("  Total Memory: {} MB", device.total_memory_mb);
112            println!("  Available Memory: {} MB", device.available_memory_mb);
113            println!("  Compute Units: {}", device.compute_units);
114            println!("  Max Work Group: {}", device.max_work_group_size);
115            println!("  Compute Capability: {}", device.compute_capability);
116            println!(
117                "  Double Precision: {}",
118                if device.supports_double_precision {
119                    "โœ…"
120                } else {
121                    "โŒ"
122                }
123            );
124
125            // Calculate utilization
126            let utilization = (device.total_memory_mb - device.available_memory_mb) as f64
127                / device.total_memory_mb as f64
128                * 100.0;
129            println!("  Memory Utilization: {utilization:.1}%");
130        }
131    }
132
133    println!();
134    Ok(())
135}
136
137#[allow(dead_code)]
138fn demonstrate_backend_comparison() -> Result<(), Box<dyn std::error::Error>> {
139    println!("โšก GPU BACKEND COMPARISON");
140    println!("{}", "-".repeat(40));
141
142    let testsize = 50_000;
143    let features = 20;
144
145    println!("Comparing backends for {testsize} samples with {features} features:");
146
147    // Test different backends
148    let backends = vec![
149        ("CPU Fallback", GpuBackend::Cpu),
150        ("CUDA", GpuBackend::Cuda { device_id: 0 }),
151        (
152            "OpenCL",
153            GpuBackend::OpenCl {
154                platform_id: 0,
155                device_id: 0,
156            },
157        ),
158    ];
159
160    let mut results: HashMap<String, std::time::Duration> = HashMap::new();
161
162    for (name, backend) in backends {
163        println!("\nTesting {name}:");
164
165        let config = GpuConfig {
166            backend: backend.clone(),
167            threads_per_block: 256,
168            enable_double_precision: true,
169            ..Default::default()
170        };
171
172        match GpuContext::new(config) {
173            Ok(context) => {
174                if context.is_available() {
175                    // Test classification generation
176                    let start = Instant::now();
177                    let dataset =
178                        context.make_classification_gpu(testsize, features, 5, 2, 15, Some(42))?;
179                    let duration = start.elapsed();
180
181                    results.insert(name.to_string(), duration);
182
183                    println!(
184                        "  โœ… Classification: {} samples in {:.2}ms",
185                        dataset.n_samples(),
186                        duration.as_millis()
187                    );
188                    println!(
189                        "  ๐Ÿ“Š Throughput: {:.1} samples/s",
190                        dataset.n_samples() as f64 / duration.as_secs_f64()
191                    );
192                } else {
193                    println!("  โŒ Backend not available");
194                }
195            }
196            Err(e) => {
197                println!("  โŒ Error: {e}");
198            }
199        }
200    }
201
202    // Calculate speedups
203    if let Some(cpu_time) = results.get("CPU Fallback") {
204        println!("\nSpeedup Analysis:");
205        for (backend, gpu_time) in &results {
206            if backend != "CPU Fallback" {
207                let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
208                println!("  {backend}: {speedup:.1}x faster than CPU");
209            }
210        }
211    }
212
213    println!();
214    Ok(())
215}
216
217#[allow(dead_code)]
218fn demonstrate_performance_benchmarks() -> Result<(), Box<dyn std::error::Error>> {
219    println!("๐Ÿ“Š PERFORMANCE BENCHMARKS");
220    println!("{}", "-".repeat(40));
221
222    let config = get_optimal_gpu_config();
223    let benchmark = GpuBenchmark::new(config)?;
224
225    println!("Running data generation benchmarks...");
226    let data_results = benchmark.benchmark_data_generation()?;
227    data_results.print_results();
228
229    println!("\nRunning matrix operation benchmarks...");
230    let matrix_results = benchmark.benchmark_matrix_operations()?;
231    matrix_results.print_results();
232
233    // Compare with CPU baseline
234    println!("\nCPU vs GPU Comparison:");
235    demonstrate_cpu_gpu_comparison()?;
236
237    println!();
238    Ok(())
239}
240
241#[allow(dead_code)]
242fn demonstrate_cpu_gpu_comparison() -> Result<(), Box<dyn std::error::Error>> {
243    let dataset_sizes = vec![10_000, 50_000, 100_000];
244
245    println!(
246        "{:<12} {:<15} {:<15} {:<10}",
247        "Size", "CPU Time", "GPU Time", "Speedup"
248    );
249    println!("{}", "-".repeat(55));
250
251    for &size in &dataset_sizes {
252        // CPU benchmark
253        let cpu_start = Instant::now();
254        let _cpudataset = make_classification(size, 20, 5, 2, 15, Some(42))?;
255        let cpu_time = cpu_start.elapsed();
256
257        // GPU benchmark
258        let gpu_start = Instant::now();
259        let _gpudataset = make_classification_auto_gpu(size, 20, 5, 2, 15, Some(42))?;
260        let gpu_time = gpu_start.elapsed();
261
262        let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
263
264        println!(
265            "{:<12} {:<15} {:<15} {:<10.1}x",
266            size,
267            format!("{:.1}ms", cpu_time.as_millis()),
268            format!("{:.1}ms", gpu_time.as_millis()),
269            speedup
270        );
271    }
272
273    Ok(())
274}
275
276#[allow(dead_code)]
277fn demonstrate_memory_management() -> Result<(), Box<dyn std::error::Error>> {
278    println!("๐Ÿ’พ GPU MEMORY MANAGEMENT");
279    println!("{}", "-".repeat(40));
280
281    // Configure memory-constrained GPU context
282    let memory_config = GpuMemoryConfig {
283        max_memory_mb: Some(512),  // Limit to 512MB
284        pool_size_mb: 256,         // 256MB pool
285        enable_coalescing: true,   // Enable memory coalescing
286        use_unified_memory: false, // Don't use unified memory
287    };
288
289    let gpu_config = GpuConfig {
290        backend: get_optimal_gpu_config().backend,
291        memory: memory_config,
292        threads_per_block: 256,
293        ..Default::default()
294    };
295
296    println!("Memory Configuration:");
297    println!(
298        "  Max Memory: {} MB",
299        gpu_config.memory.max_memory_mb.unwrap_or(0)
300    );
301    println!("  Pool Size: {} MB", gpu_config.memory.pool_size_mb);
302    println!("  Coalescing: {}", gpu_config.memory.enable_coalescing);
303    println!("  Unified Memory: {}", gpu_config.memory.use_unified_memory);
304
305    let context = GpuContext::new(gpu_config)?;
306    let device_info = context.device_info();
307
308    println!("\nDevice Memory Info:");
309    println!("  Total: {} MB", device_info.total_memory_mb);
310    println!("  Available: {} MB", device_info.available_memory_mb);
311    println!(
312        "  Utilization: {:.1}%",
313        (device_info.total_memory_mb - device_info.available_memory_mb) as f64
314            / device_info.total_memory_mb as f64
315            * 100.0
316    );
317
318    // Test memory-efficient generation
319    println!("\nTesting memory-efficient dataset generation...");
320
321    let sizes = vec![10_000, 25_000, 50_000];
322    for &size in &sizes {
323        let start = Instant::now();
324
325        match context.make_regression_gpu(size, 50, 30, 0.1, Some(42)) {
326            Ok(dataset) => {
327                let duration = start.elapsed();
328                let memory_estimate = dataset.n_samples() * dataset.n_features() * 8; // 8 bytes per f64
329
330                println!(
331                    "  {} samples: {:.1}ms (~{:.1} MB)",
332                    size,
333                    duration.as_millis(),
334                    memory_estimate as f64 / (1024.0 * 1024.0)
335                );
336            }
337            Err(e) => {
338                println!("  {size} samples: Failed - {e}");
339            }
340        }
341    }
342
343    println!();
344    Ok(())
345}