pub fn get_optimal_gpu_config() -> GpuConfigExpand description
Get optimal GPU configuration for the current system
Examples found in repository?
examples/gpu_acceleration.rs (line 68)
45fn demonstrate_gpu_detection() {
46 println!("๐ GPU DETECTION AND AVAILABILITY");
47 println!("{}", "-".repeat(40));
48
49 println!("CUDA Support:");
50 if is_cuda_available() {
51 println!(" โ
CUDA is available");
52 println!(" ๐ฏ NVIDIA GPU acceleration supported");
53 } else {
54 println!(" โ CUDA not available");
55 println!(" ๐ก Install CUDA toolkit for NVIDIA GPU support");
56 }
57
58 println!("\nOpenCL Support:");
59 if is_opencl_available() {
60 println!(" โ
OpenCL is available");
61 println!(" ๐ฏ Multi-vendor GPU acceleration supported");
62 } else {
63 println!(" โ OpenCL not available");
64 println!(" ๐ก Install OpenCL runtime for GPU support");
65 }
66
67 // Get optimal configuration
68 let optimal_config = get_optimal_gpu_config();
69 println!("\nOptimal Configuration:");
70 match optimal_config.backend {
71 GpuBackend::Cuda { device_id } => {
72 println!(" ๐ CUDA backend (device {device_id})");
73 }
74 GpuBackend::OpenCl {
75 platform_id,
76 device_id,
77 } => {
78 println!(" ๐ OpenCL backend (platform {platform_id}, device {device_id})");
79 }
80 GpuBackend::Cpu => {
81 println!(" ๐ป CPU fallback (no GPU available)");
82 }
83 }
84 println!(
85 " ๐งต Threads per block: {}",
86 optimal_config.threads_per_block
87 );
88 println!(
89 " ๐ข Double precision: {}",
90 optimal_config.enable_double_precision
91 );
92
93 println!();
94}
95
96#[allow(dead_code)]
97fn demonstrate_device_listing() -> Result<(), Box<dyn std::error::Error>> {
98 println!("๐ AVAILABLE GPU DEVICES");
99 println!("{}", "-".repeat(40));
100
101 let devices = list_gpu_devices()?;
102
103 if devices.is_empty() {
104 println!("No GPU devices found. Using CPU fallback.");
105 } else {
106 println!("Found {} device(s):", devices.len());
107
108 for (i, device) in devices.iter().enumerate() {
109 println!("\nDevice {i}:");
110 println!(" Name: {}", device.name);
111 println!(" Total Memory: {} MB", device.total_memory_mb);
112 println!(" Available Memory: {} MB", device.available_memory_mb);
113 println!(" Compute Units: {}", device.compute_units);
114 println!(" Max Work Group: {}", device.max_work_group_size);
115 println!(" Compute Capability: {}", device.compute_capability);
116 println!(
117 " Double Precision: {}",
118 if device.supports_double_precision {
119 "โ
"
120 } else {
121 "โ"
122 }
123 );
124
125 // Calculate utilization
126 let utilization = (device.total_memory_mb - device.available_memory_mb) as f64
127 / device.total_memory_mb as f64
128 * 100.0;
129 println!(" Memory Utilization: {utilization:.1}%");
130 }
131 }
132
133 println!();
134 Ok(())
135}
136
137#[allow(dead_code)]
138fn demonstrate_backend_comparison() -> Result<(), Box<dyn std::error::Error>> {
139 println!("โก GPU BACKEND COMPARISON");
140 println!("{}", "-".repeat(40));
141
142 let testsize = 50_000;
143 let features = 20;
144
145 println!("Comparing backends for {testsize} samples with {features} features:");
146
147 // Test different backends
148 let backends = vec![
149 ("CPU Fallback", GpuBackend::Cpu),
150 ("CUDA", GpuBackend::Cuda { device_id: 0 }),
151 (
152 "OpenCL",
153 GpuBackend::OpenCl {
154 platform_id: 0,
155 device_id: 0,
156 },
157 ),
158 ];
159
160 let mut results: HashMap<String, std::time::Duration> = HashMap::new();
161
162 for (name, backend) in backends {
163 println!("\nTesting {name}:");
164
165 let config = GpuConfig {
166 backend: backend.clone(),
167 threads_per_block: 256,
168 enable_double_precision: true,
169 ..Default::default()
170 };
171
172 match GpuContext::new(config) {
173 Ok(context) => {
174 if context.is_available() {
175 // Test classification generation
176 let start = Instant::now();
177 let dataset =
178 context.make_classification_gpu(testsize, features, 5, 2, 15, Some(42))?;
179 let duration = start.elapsed();
180
181 results.insert(name.to_string(), duration);
182
183 println!(
184 " โ
Classification: {} samples in {:.2}ms",
185 dataset.n_samples(),
186 duration.as_millis()
187 );
188 println!(
189 " ๐ Throughput: {:.1} samples/s",
190 dataset.n_samples() as f64 / duration.as_secs_f64()
191 );
192 } else {
193 println!(" โ Backend not available");
194 }
195 }
196 Err(e) => {
197 println!(" โ Error: {e}");
198 }
199 }
200 }
201
202 // Calculate speedups
203 if let Some(cpu_time) = results.get("CPU Fallback") {
204 println!("\nSpeedup Analysis:");
205 for (backend, gpu_time) in &results {
206 if backend != "CPU Fallback" {
207 let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
208 println!(" {backend}: {speedup:.1}x faster than CPU");
209 }
210 }
211 }
212
213 println!();
214 Ok(())
215}
216
217#[allow(dead_code)]
218fn demonstrate_performance_benchmarks() -> Result<(), Box<dyn std::error::Error>> {
219 println!("๐ PERFORMANCE BENCHMARKS");
220 println!("{}", "-".repeat(40));
221
222 let config = get_optimal_gpu_config();
223 let benchmark = GpuBenchmark::new(config)?;
224
225 println!("Running data generation benchmarks...");
226 let data_results = benchmark.benchmark_data_generation()?;
227 data_results.print_results();
228
229 println!("\nRunning matrix operation benchmarks...");
230 let matrix_results = benchmark.benchmark_matrix_operations()?;
231 matrix_results.print_results();
232
233 // Compare with CPU baseline
234 println!("\nCPU vs GPU Comparison:");
235 demonstrate_cpu_gpu_comparison()?;
236
237 println!();
238 Ok(())
239}
240
241#[allow(dead_code)]
242fn demonstrate_cpu_gpu_comparison() -> Result<(), Box<dyn std::error::Error>> {
243 let dataset_sizes = vec![10_000, 50_000, 100_000];
244
245 println!(
246 "{:<12} {:<15} {:<15} {:<10}",
247 "Size", "CPU Time", "GPU Time", "Speedup"
248 );
249 println!("{}", "-".repeat(55));
250
251 for &size in &dataset_sizes {
252 // CPU benchmark
253 let cpu_start = Instant::now();
254 let _cpudataset = make_classification(size, 20, 5, 2, 15, Some(42))?;
255 let cpu_time = cpu_start.elapsed();
256
257 // GPU benchmark
258 let gpu_start = Instant::now();
259 let _gpudataset = make_classification_auto_gpu(size, 20, 5, 2, 15, Some(42))?;
260 let gpu_time = gpu_start.elapsed();
261
262 let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
263
264 println!(
265 "{:<12} {:<15} {:<15} {:<10.1}x",
266 size,
267 format!("{:.1}ms", cpu_time.as_millis()),
268 format!("{:.1}ms", gpu_time.as_millis()),
269 speedup
270 );
271 }
272
273 Ok(())
274}
275
276#[allow(dead_code)]
277fn demonstrate_memory_management() -> Result<(), Box<dyn std::error::Error>> {
278 println!("๐พ GPU MEMORY MANAGEMENT");
279 println!("{}", "-".repeat(40));
280
281 // Configure memory-constrained GPU context
282 let memory_config = GpuMemoryConfig {
283 max_memory_mb: Some(512), // Limit to 512MB
284 pool_size_mb: 256, // 256MB pool
285 enable_coalescing: true, // Enable memory coalescing
286 use_unified_memory: false, // Don't use unified memory
287 };
288
289 let gpu_config = GpuConfig {
290 backend: get_optimal_gpu_config().backend,
291 memory: memory_config,
292 threads_per_block: 256,
293 ..Default::default()
294 };
295
296 println!("Memory Configuration:");
297 println!(
298 " Max Memory: {} MB",
299 gpu_config.memory.max_memory_mb.unwrap_or(0)
300 );
301 println!(" Pool Size: {} MB", gpu_config.memory.pool_size_mb);
302 println!(" Coalescing: {}", gpu_config.memory.enable_coalescing);
303 println!(" Unified Memory: {}", gpu_config.memory.use_unified_memory);
304
305 let context = GpuContext::new(gpu_config)?;
306 let device_info = context.device_info();
307
308 println!("\nDevice Memory Info:");
309 println!(" Total: {} MB", device_info.total_memory_mb);
310 println!(" Available: {} MB", device_info.available_memory_mb);
311 println!(
312 " Utilization: {:.1}%",
313 (device_info.total_memory_mb - device_info.available_memory_mb) as f64
314 / device_info.total_memory_mb as f64
315 * 100.0
316 );
317
318 // Test memory-efficient generation
319 println!("\nTesting memory-efficient dataset generation...");
320
321 let sizes = vec![10_000, 25_000, 50_000];
322 for &size in &sizes {
323 let start = Instant::now();
324
325 match context.make_regression_gpu(size, 50, 30, 0.1, Some(42)) {
326 Ok(dataset) => {
327 let duration = start.elapsed();
328 let memory_estimate = dataset.n_samples() * dataset.n_features() * 8; // 8 bytes per f64
329
330 println!(
331 " {} samples: {:.1}ms (~{:.1} MB)",
332 size,
333 duration.as_millis(),
334 memory_estimate as f64 / (1024.0 * 1024.0)
335 );
336 }
337 Err(e) => {
338 println!(" {size} samples: Failed - {e}");
339 }
340 }
341 }
342
343 println!();
344 Ok(())
345}