pub fn make_blobs_auto_gpu(
n_samples: usize,
n_features: usize,
n_centers: usize,
cluster_std: f64,
random_state: Option<u64>,
) -> Result<Dataset>
Expand description
Generate blobs dataset with automatic GPU detection
Examples found in repository?
examples/gpu_acceleration.rs (line 394)
368fn demonstrate_large_scale_generation() -> Result<(), Box<dyn std::error::Error>> {
369 println!(" 🎯 Goal: Generate 1M samples across multiple datasets");
370 println!(" 📊 Using GPU acceleration for maximum throughput");
371
372 let total_samples = 1_000_000;
373 let features = 100;
374
375 // Track generation times
376 let mut generation_times = Vec::new();
377 let start_total = Instant::now();
378
379 // Classification dataset
380 let start = Instant::now();
381 let classification =
382 make_classification_auto_gpu(total_samples, features, 10, 2, 50, Some(42))?;
383 let class_time = start.elapsed();
384 generation_times.push(("Classification", class_time, classification.n_samples()));
385
386 // Regression dataset
387 let start = Instant::now();
388 let regression = make_regression_auto_gpu(total_samples, features, 60, 0.1, Some(43))?;
389 let reg_time = start.elapsed();
390 generation_times.push(("Regression", reg_time, regression.n_samples()));
391
392 // Clustering dataset
393 let start = Instant::now();
394 let clustering = make_blobs_auto_gpu(total_samples, 50, 20, 1.5, Some(44))?;
395 let cluster_time = start.elapsed();
396 generation_times.push(("Clustering", cluster_time, clustering.n_samples()));
397
398 let total_time = start_total.elapsed();
399
400 println!(" ✅ Generation Results:");
401 for (name, time, samples) in generation_times {
402 let throughput = samples as f64 / time.as_secs_f64();
403 println!(
404 " {}: {:.1}s ({:.1}K samples/s)",
405 name,
406 time.as_secs_f64(),
407 throughput / 1000.0
408 );
409 }
410
411 let total_samples_generated =
412 classification.n_samples() + regression.n_samples() + clustering.n_samples();
413 let overall_throughput = total_samples_generated as f64 / total_time.as_secs_f64();
414
415 println!(
416 " 📈 Overall: {} samples in {:.1}s ({:.1}K samples/s)",
417 total_samples_generated,
418 total_time.as_secs_f64(),
419 overall_throughput / 1000.0
420 );
421
422 Ok(())
423}
424
425#[allow(dead_code)]
426fn demonstrate_rapid_prototyping() -> Result<(), Box<dyn std::error::Error>> {
427 println!(" 🎯 Goal: Quickly test different dataset configurations");
428 println!(" âš¡ Using GPU for instant feedback");
429
430 let configurations = vec![
431 ("Small Dense", 1_000, 20, 5),
432 ("Medium Sparse", 10_000, 100, 20),
433 ("Large High-Dim", 100_000, 500, 100),
434 ];
435
436 for (name, samples, features, informative) in configurations {
437 let start = Instant::now();
438
439 let dataset = make_classification_auto_gpu(samples, features, 5, 2, informative, Some(42))?;
440 let duration = start.elapsed();
441
442 // Quick analysis
443 let memory_usage = dataset.n_samples() * dataset.n_features() * 8; // bytes
444 let density = informative as f64 / features as f64;
445
446 println!(
447 " {}: {} in {:.1}ms",
448 name,
449 format_number(dataset.n_samples()),
450 duration.as_millis()
451 );
452 println!(
453 " Features: {} (density: {:.1}%)",
454 features,
455 density * 100.0
456 );
457 println!(
458 " Memory: {:.1} MB",
459 memory_usage as f64 / (1024.0 * 1024.0)
460 );
461 }
462
463 Ok(())
464}
465
466#[allow(dead_code)]
467fn demonstrate_batch_processing() -> Result<(), Box<dyn std::error::Error>> {
468 println!(" 🎯 Goal: Process multiple dataset requests in parallel");
469 println!(" 🔄 Simulating production workload");
470
471 // Simulate batch requests
472 let requests = vec![
473 ("User A - Classification", 5_000, 30, "classification"),
474 ("User B - Regression", 8_000, 25, "regression"),
475 ("User C - Clustering", 3_000, 15, "clustering"),
476 ("User D - Classification", 12_000, 40, "classification"),
477 ("User E - Regression", 6_000, 35, "regression"),
478 ];
479
480 let batch_start = Instant::now();
481 let mut total_samples = 0;
482
483 for (requestname, samples, features, dataset_type) in requests {
484 let start = Instant::now();
485
486 let dataset = match dataset_type {
487 "classification" => {
488 make_classification_auto_gpu(samples, features, 5, 2, features / 2, Some(42))?
489 }
490 "regression" => {
491 make_regression_auto_gpu(samples, features, features / 2, 0.1, Some(42))?
492 }
493 "clustering" => make_blobs_auto_gpu(samples, features, 8, 1.0, Some(42))?,
494 _ => unreachable!(),
495 };
496
497 let duration = start.elapsed();
498 total_samples += dataset.n_samples();
499
500 println!(
501 " {}: {} samples in {:.1}ms",
502 requestname,
503 dataset.n_samples(),
504 duration.as_millis()
505 );
506 }
507
508 let batch_duration = batch_start.elapsed();
509 let batch_throughput = total_samples as f64 / batch_duration.as_secs_f64();
510
511 println!(" 📊 Batch Summary:");
512 println!(" Total Requests: 5");
513 println!(" Total Samples: {}", format_number(total_samples));
514 println!(" Batch Time: {:.2}s", batch_duration.as_secs_f64());
515 println!(
516 " Throughput: {:.1}K samples/s",
517 batch_throughput / 1000.0
518 );
519
520 Ok(())
521}