pub fn make_classification_auto_gpu(
n_samples: usize,
n_features: usize,
n_classes: usize,
n_clusters_per_class: usize,
n_informative: usize,
random_state: Option<u64>,
) -> Result<Dataset>
Expand description
Convenience functions for GPU-accelerated data generation
Generate classification dataset with automatic GPU detection
Examples found in repository?
examples/gpu_acceleration.rs (line 259)
242fn demonstrate_cpu_gpu_comparison() -> Result<(), Box<dyn std::error::Error>> {
243 let dataset_sizes = vec![10_000, 50_000, 100_000];
244
245 println!(
246 "{:<12} {:<15} {:<15} {:<10}",
247 "Size", "CPU Time", "GPU Time", "Speedup"
248 );
249 println!("{}", "-".repeat(55));
250
251 for &size in &dataset_sizes {
252 // CPU benchmark
253 let cpu_start = Instant::now();
254 let _cpudataset = make_classification(size, 20, 5, 2, 15, Some(42))?;
255 let cpu_time = cpu_start.elapsed();
256
257 // GPU benchmark
258 let gpu_start = Instant::now();
259 let _gpudataset = make_classification_auto_gpu(size, 20, 5, 2, 15, Some(42))?;
260 let gpu_time = gpu_start.elapsed();
261
262 let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
263
264 println!(
265 "{:<12} {:<15} {:<15} {:<10.1}x",
266 size,
267 format!("{:.1}ms", cpu_time.as_millis()),
268 format!("{:.1}ms", gpu_time.as_millis()),
269 speedup
270 );
271 }
272
273 Ok(())
274}
275
276#[allow(dead_code)]
277fn demonstrate_memory_management() -> Result<(), Box<dyn std::error::Error>> {
278 println!("💾 GPU MEMORY MANAGEMENT");
279 println!("{}", "-".repeat(40));
280
281 // Configure memory-constrained GPU context
282 let memory_config = GpuMemoryConfig {
283 max_memory_mb: Some(512), // Limit to 512MB
284 pool_size_mb: 256, // 256MB pool
285 enable_coalescing: true, // Enable memory coalescing
286 use_unified_memory: false, // Don't use unified memory
287 };
288
289 let gpu_config = GpuConfig {
290 backend: get_optimal_gpu_config().backend,
291 memory: memory_config,
292 threads_per_block: 256,
293 ..Default::default()
294 };
295
296 println!("Memory Configuration:");
297 println!(
298 " Max Memory: {} MB",
299 gpu_config.memory.max_memory_mb.unwrap_or(0)
300 );
301 println!(" Pool Size: {} MB", gpu_config.memory.pool_size_mb);
302 println!(" Coalescing: {}", gpu_config.memory.enable_coalescing);
303 println!(" Unified Memory: {}", gpu_config.memory.use_unified_memory);
304
305 let context = GpuContext::new(gpu_config)?;
306 let device_info = context.device_info();
307
308 println!("\nDevice Memory Info:");
309 println!(" Total: {} MB", device_info.total_memory_mb);
310 println!(" Available: {} MB", device_info.available_memory_mb);
311 println!(
312 " Utilization: {:.1}%",
313 (device_info.total_memory_mb - device_info.available_memory_mb) as f64
314 / device_info.total_memory_mb as f64
315 * 100.0
316 );
317
318 // Test memory-efficient generation
319 println!("\nTesting memory-efficient dataset generation...");
320
321 let sizes = vec![10_000, 25_000, 50_000];
322 for &size in &sizes {
323 let start = Instant::now();
324
325 match context.make_regression_gpu(size, 50, 30, 0.1, Some(42)) {
326 Ok(dataset) => {
327 let duration = start.elapsed();
328 let memory_estimate = dataset.n_samples() * dataset.n_features() * 8; // 8 bytes per f64
329
330 println!(
331 " {} samples: {:.1}ms (~{:.1} MB)",
332 size,
333 duration.as_millis(),
334 memory_estimate as f64 / (1024.0 * 1024.0)
335 );
336 }
337 Err(e) => {
338 println!(" {size} samples: Failed - {e}");
339 }
340 }
341 }
342
343 println!();
344 Ok(())
345}
346
347#[allow(dead_code)]
348fn demonstrate_real_world_scenarios() -> Result<(), Box<dyn std::error::Error>> {
349 println!("🌍 REAL-WORLD GPU SCENARIOS");
350 println!("{}", "-".repeat(40));
351
352 // Scenario 1: Large-scale data augmentation
353 println!("Scenario 1: Large-scale synthetic data generation");
354 demonstrate_large_scale_generation()?;
355
356 // Scenario 2: Rapid prototyping with GPU
357 println!("\nScenario 2: Rapid prototyping workflow");
358 demonstrate_rapid_prototyping()?;
359
360 // Scenario 3: Batch processing
361 println!("\nScenario 3: Batch dataset processing");
362 demonstrate_batch_processing()?;
363
364 Ok(())
365}
366
367#[allow(dead_code)]
368fn demonstrate_large_scale_generation() -> Result<(), Box<dyn std::error::Error>> {
369 println!(" 🎯 Goal: Generate 1M samples across multiple datasets");
370 println!(" 📊 Using GPU acceleration for maximum throughput");
371
372 let total_samples = 1_000_000;
373 let features = 100;
374
375 // Track generation times
376 let mut generation_times = Vec::new();
377 let start_total = Instant::now();
378
379 // Classification dataset
380 let start = Instant::now();
381 let classification =
382 make_classification_auto_gpu(total_samples, features, 10, 2, 50, Some(42))?;
383 let class_time = start.elapsed();
384 generation_times.push(("Classification", class_time, classification.n_samples()));
385
386 // Regression dataset
387 let start = Instant::now();
388 let regression = make_regression_auto_gpu(total_samples, features, 60, 0.1, Some(43))?;
389 let reg_time = start.elapsed();
390 generation_times.push(("Regression", reg_time, regression.n_samples()));
391
392 // Clustering dataset
393 let start = Instant::now();
394 let clustering = make_blobs_auto_gpu(total_samples, 50, 20, 1.5, Some(44))?;
395 let cluster_time = start.elapsed();
396 generation_times.push(("Clustering", cluster_time, clustering.n_samples()));
397
398 let total_time = start_total.elapsed();
399
400 println!(" ✅ Generation Results:");
401 for (name, time, samples) in generation_times {
402 let throughput = samples as f64 / time.as_secs_f64();
403 println!(
404 " {}: {:.1}s ({:.1}K samples/s)",
405 name,
406 time.as_secs_f64(),
407 throughput / 1000.0
408 );
409 }
410
411 let total_samples_generated =
412 classification.n_samples() + regression.n_samples() + clustering.n_samples();
413 let overall_throughput = total_samples_generated as f64 / total_time.as_secs_f64();
414
415 println!(
416 " 📈 Overall: {} samples in {:.1}s ({:.1}K samples/s)",
417 total_samples_generated,
418 total_time.as_secs_f64(),
419 overall_throughput / 1000.0
420 );
421
422 Ok(())
423}
424
425#[allow(dead_code)]
426fn demonstrate_rapid_prototyping() -> Result<(), Box<dyn std::error::Error>> {
427 println!(" 🎯 Goal: Quickly test different dataset configurations");
428 println!(" ⚡ Using GPU for instant feedback");
429
430 let configurations = vec![
431 ("Small Dense", 1_000, 20, 5),
432 ("Medium Sparse", 10_000, 100, 20),
433 ("Large High-Dim", 100_000, 500, 100),
434 ];
435
436 for (name, samples, features, informative) in configurations {
437 let start = Instant::now();
438
439 let dataset = make_classification_auto_gpu(samples, features, 5, 2, informative, Some(42))?;
440 let duration = start.elapsed();
441
442 // Quick analysis
443 let memory_usage = dataset.n_samples() * dataset.n_features() * 8; // bytes
444 let density = informative as f64 / features as f64;
445
446 println!(
447 " {}: {} in {:.1}ms",
448 name,
449 format_number(dataset.n_samples()),
450 duration.as_millis()
451 );
452 println!(
453 " Features: {} (density: {:.1}%)",
454 features,
455 density * 100.0
456 );
457 println!(
458 " Memory: {:.1} MB",
459 memory_usage as f64 / (1024.0 * 1024.0)
460 );
461 }
462
463 Ok(())
464}
465
466#[allow(dead_code)]
467fn demonstrate_batch_processing() -> Result<(), Box<dyn std::error::Error>> {
468 println!(" 🎯 Goal: Process multiple dataset requests in parallel");
469 println!(" 🔄 Simulating production workload");
470
471 // Simulate batch requests
472 let requests = vec![
473 ("User A - Classification", 5_000, 30, "classification"),
474 ("User B - Regression", 8_000, 25, "regression"),
475 ("User C - Clustering", 3_000, 15, "clustering"),
476 ("User D - Classification", 12_000, 40, "classification"),
477 ("User E - Regression", 6_000, 35, "regression"),
478 ];
479
480 let batch_start = Instant::now();
481 let mut total_samples = 0;
482
483 for (requestname, samples, features, dataset_type) in requests {
484 let start = Instant::now();
485
486 let dataset = match dataset_type {
487 "classification" => {
488 make_classification_auto_gpu(samples, features, 5, 2, features / 2, Some(42))?
489 }
490 "regression" => {
491 make_regression_auto_gpu(samples, features, features / 2, 0.1, Some(42))?
492 }
493 "clustering" => make_blobs_auto_gpu(samples, features, 8, 1.0, Some(42))?,
494 _ => unreachable!(),
495 };
496
497 let duration = start.elapsed();
498 total_samples += dataset.n_samples();
499
500 println!(
501 " {}: {} samples in {:.1}ms",
502 requestname,
503 dataset.n_samples(),
504 duration.as_millis()
505 );
506 }
507
508 let batch_duration = batch_start.elapsed();
509 let batch_throughput = total_samples as f64 / batch_duration.as_secs_f64();
510
511 println!(" 📊 Batch Summary:");
512 println!(" Total Requests: 5");
513 println!(" Total Samples: {}", format_number(total_samples));
514 println!(" Batch Time: {:.2}s", batch_duration.as_secs_f64());
515 println!(
516 " Throughput: {:.1}K samples/s",
517 batch_throughput / 1000.0
518 );
519
520 Ok(())
521}