make_regression_auto_gpu

Function make_regression_auto_gpu 

Source
pub fn make_regression_auto_gpu(
    n_samples: usize,
    n_features: usize,
    n_informative: usize,
    noise: f64,
    random_state: Option<u64>,
) -> Result<Dataset>
Expand description

Generate regression dataset with automatic GPU detection

Examples found in repository?
examples/gpu_acceleration.rs (line 388)
368fn demonstrate_large_scale_generation() -> Result<(), Box<dyn std::error::Error>> {
369    println!("  🎯 Goal: Generate 1M samples across multiple datasets");
370    println!("  📊 Using GPU acceleration for maximum throughput");
371
372    let total_samples = 1_000_000;
373    let features = 100;
374
375    // Track generation times
376    let mut generation_times = Vec::new();
377    let start_total = Instant::now();
378
379    // Classification dataset
380    let start = Instant::now();
381    let classification =
382        make_classification_auto_gpu(total_samples, features, 10, 2, 50, Some(42))?;
383    let class_time = start.elapsed();
384    generation_times.push(("Classification", class_time, classification.n_samples()));
385
386    // Regression dataset
387    let start = Instant::now();
388    let regression = make_regression_auto_gpu(total_samples, features, 60, 0.1, Some(43))?;
389    let reg_time = start.elapsed();
390    generation_times.push(("Regression", reg_time, regression.n_samples()));
391
392    // Clustering dataset
393    let start = Instant::now();
394    let clustering = make_blobs_auto_gpu(total_samples, 50, 20, 1.5, Some(44))?;
395    let cluster_time = start.elapsed();
396    generation_times.push(("Clustering", cluster_time, clustering.n_samples()));
397
398    let total_time = start_total.elapsed();
399
400    println!("  ✅ Generation Results:");
401    for (name, time, samples) in generation_times {
402        let throughput = samples as f64 / time.as_secs_f64();
403        println!(
404            "    {}: {:.1}s ({:.1}K samples/s)",
405            name,
406            time.as_secs_f64(),
407            throughput / 1000.0
408        );
409    }
410
411    let total_samples_generated =
412        classification.n_samples() + regression.n_samples() + clustering.n_samples();
413    let overall_throughput = total_samples_generated as f64 / total_time.as_secs_f64();
414
415    println!(
416        "  📈 Overall: {} samples in {:.1}s ({:.1}K samples/s)",
417        total_samples_generated,
418        total_time.as_secs_f64(),
419        overall_throughput / 1000.0
420    );
421
422    Ok(())
423}
424
425#[allow(dead_code)]
426fn demonstrate_rapid_prototyping() -> Result<(), Box<dyn std::error::Error>> {
427    println!("  🎯 Goal: Quickly test different dataset configurations");
428    println!("  âš¡ Using GPU for instant feedback");
429
430    let configurations = vec![
431        ("Small Dense", 1_000, 20, 5),
432        ("Medium Sparse", 10_000, 100, 20),
433        ("Large High-Dim", 100_000, 500, 100),
434    ];
435
436    for (name, samples, features, informative) in configurations {
437        let start = Instant::now();
438
439        let dataset = make_classification_auto_gpu(samples, features, 5, 2, informative, Some(42))?;
440        let duration = start.elapsed();
441
442        // Quick analysis
443        let memory_usage = dataset.n_samples() * dataset.n_features() * 8; // bytes
444        let density = informative as f64 / features as f64;
445
446        println!(
447            "    {}: {} in {:.1}ms",
448            name,
449            format_number(dataset.n_samples()),
450            duration.as_millis()
451        );
452        println!(
453            "      Features: {} (density: {:.1}%)",
454            features,
455            density * 100.0
456        );
457        println!(
458            "      Memory: {:.1} MB",
459            memory_usage as f64 / (1024.0 * 1024.0)
460        );
461    }
462
463    Ok(())
464}
465
466#[allow(dead_code)]
467fn demonstrate_batch_processing() -> Result<(), Box<dyn std::error::Error>> {
468    println!("  🎯 Goal: Process multiple dataset requests in parallel");
469    println!("  🔄 Simulating production workload");
470
471    // Simulate batch requests
472    let requests = vec![
473        ("User A - Classification", 5_000, 30, "classification"),
474        ("User B - Regression", 8_000, 25, "regression"),
475        ("User C - Clustering", 3_000, 15, "clustering"),
476        ("User D - Classification", 12_000, 40, "classification"),
477        ("User E - Regression", 6_000, 35, "regression"),
478    ];
479
480    let batch_start = Instant::now();
481    let mut total_samples = 0;
482
483    for (requestname, samples, features, dataset_type) in requests {
484        let start = Instant::now();
485
486        let dataset = match dataset_type {
487            "classification" => {
488                make_classification_auto_gpu(samples, features, 5, 2, features / 2, Some(42))?
489            }
490            "regression" => {
491                make_regression_auto_gpu(samples, features, features / 2, 0.1, Some(42))?
492            }
493            "clustering" => make_blobs_auto_gpu(samples, features, 8, 1.0, Some(42))?,
494            _ => unreachable!(),
495        };
496
497        let duration = start.elapsed();
498        total_samples += dataset.n_samples();
499
500        println!(
501            "    {}: {} samples in {:.1}ms",
502            requestname,
503            dataset.n_samples(),
504            duration.as_millis()
505        );
506    }
507
508    let batch_duration = batch_start.elapsed();
509    let batch_throughput = total_samples as f64 / batch_duration.as_secs_f64();
510
511    println!("  📊 Batch Summary:");
512    println!("    Total Requests: 5");
513    println!("    Total Samples: {}", format_number(total_samples));
514    println!("    Batch Time: {:.2}s", batch_duration.as_secs_f64());
515    println!(
516        "    Throughput: {:.1}K samples/s",
517        batch_throughput / 1000.0
518    );
519
520    Ok(())
521}