scirs2_datasets/generators/
gpu.rs

1//! GPU-accelerated dataset generators
2
3use super::basic::{make_blobs, make_classification, make_regression};
4use super::config::GpuConfig;
5use crate::error::{DatasetsError, Result};
6use crate::gpu::{GpuContext, GpuDeviceInfo};
7use crate::utils::Dataset;
8use scirs2_core::ndarray::{Array1, Array2};
9use scirs2_core::random::prelude::*;
10use scirs2_core::random::rngs::StdRng;
11use scirs2_core::random::Distribution;
12// Use local GPU implementation instead of core to avoid feature flag issues
13use crate::gpu::GpuBackend as LocalGpuBackend;
14
15/// GPU-accelerated classification dataset generation
16#[allow(dead_code)]
17#[allow(clippy::too_many_arguments)]
18pub fn make_classification_gpu(
19    n_samples: usize,
20    n_features: usize,
21    n_classes: usize,
22    n_clusters_per_class: usize,
23    n_informative: usize,
24    randomseed: Option<u64>,
25    gpuconfig: GpuConfig,
26) -> Result<Dataset> {
27    // Check if GPU is available and requested
28    if gpuconfig.use_gpu && gpu_is_available() {
29        make_classification_gpu_impl(
30            n_samples,
31            n_features,
32            n_classes,
33            n_clusters_per_class,
34            n_informative,
35            randomseed,
36            gpuconfig,
37        )
38    } else {
39        // Fallback to CPU implementation
40        make_classification(
41            n_samples,
42            n_features,
43            n_classes,
44            n_clusters_per_class,
45            n_informative,
46            randomseed,
47        )
48    }
49}
50
51/// Internal GPU implementation for classification data generation
52#[allow(dead_code)]
53fn make_classification_gpu_impl(
54    n_samples: usize,
55    n_features: usize,
56    n_classes: usize,
57    n_clusters_per_class: usize,
58    n_informative: usize,
59    randomseed: Option<u64>,
60    gpuconfig: GpuConfig,
61) -> Result<Dataset> {
62    // Input validation
63    if n_samples == 0 || n_features == 0 || n_informative == 0 {
64        return Err(DatasetsError::InvalidFormat(
65            "n_samples, n_features, and n_informative must be > 0".to_string(),
66        ));
67    }
68
69    if n_features < n_informative {
70        return Err(DatasetsError::InvalidFormat(format!(
71            "n_features ({n_features}) must be >= n_informative ({n_informative})"
72        )));
73    }
74
75    if n_classes < 2 || n_clusters_per_class == 0 {
76        return Err(DatasetsError::InvalidFormat(
77            "n_classes must be >= 2 and n_clusters_per_class must be > 0".to_string(),
78        ));
79    }
80
81    // Create GPU context
82    let gpu_context = GpuContext::new(crate::gpu::GpuConfig {
83        backend: crate::gpu::GpuBackend::Cuda {
84            device_id: gpuconfig.device_id as u32,
85        },
86        memory: crate::gpu::GpuMemoryConfig::default(),
87        threads_per_block: 256,
88        enable_double_precision: !gpuconfig.use_single_precision,
89        use_fast_math: false,
90        random_seed: None,
91    })
92    .map_err(|e| DatasetsError::Other(format!("Failed to create GPU context: {e}")))?;
93
94    // Generate data in chunks to avoid memory issues
95    let chunk_size = std::cmp::min(gpuconfig.chunk_size, n_samples);
96    let num_chunks = n_samples.div_ceil(chunk_size);
97
98    let mut all_data = Vec::new();
99    let mut all_targets = Vec::new();
100
101    for chunk_idx in 0..num_chunks {
102        let start_idx = chunk_idx * chunk_size;
103        let end_idx = std::cmp::min(start_idx + chunk_size, n_samples);
104        let chunk_samples = end_idx - start_idx;
105
106        // Generate chunk on GPU
107        let (chunk_data, chunk_targets) = generate_classification_chunk_gpu(
108            &gpu_context,
109            chunk_samples,
110            n_features,
111            n_classes,
112            n_clusters_per_class,
113            n_informative,
114            randomseed.map(|s| s + chunk_idx as u64),
115            gpuconfig.use_single_precision,
116        )?;
117
118        all_data.extend(chunk_data);
119        all_targets.extend(chunk_targets);
120    }
121
122    // Convert to ndarray
123    let data = Array2::from_shape_vec((n_samples, n_features), all_data)
124        .map_err(|e| DatasetsError::Other(format!("Failed to create data array: {e}")))?;
125
126    let target = Array1::from_vec(all_targets);
127
128    // Create dataset
129    let mut dataset = Dataset::new(data, Some(target));
130
131    // Add metadata
132    let featurenames: Vec<String> = (0..n_features).map(|i| format!("feature_{i}")).collect();
133    let classnames: Vec<String> = (0..n_classes).map(|i| format!("class_{i}")).collect();
134
135    dataset = dataset
136        .with_featurenames(featurenames)
137        .with_targetnames(classnames)
138        .with_description(format!(
139            "GPU-accelerated synthetic classification dataset with {n_classes} _classes and {n_features} _features"
140        ));
141
142    Ok(dataset)
143}
144
145/// Generate a chunk of classification data on GPU
146#[allow(dead_code)]
147#[allow(clippy::too_many_arguments)]
148fn generate_classification_chunk_gpu(
149    gpu_context: &GpuContext,
150    n_samples: usize,
151    n_features: usize,
152    n_classes: usize,
153    n_clusters_per_class: usize,
154    n_informative: usize,
155    randomseed: Option<u64>,
156    _use_single_precision: bool,
157) -> Result<(Vec<f64>, Vec<f64>)> {
158    // For now, implement using GPU matrix operations
159    // In a real implementation, this would use custom GPU kernels
160
161    let _seed = randomseed.unwrap_or(42);
162    let mut rng = StdRng::seed_from_u64(_seed);
163
164    // Generate centroids
165    let n_centroids = n_classes * n_clusters_per_class;
166    let mut centroids = vec![0.0; n_centroids * n_informative];
167
168    for i in 0..n_centroids {
169        for j in 0..n_informative {
170            centroids[i * n_informative + j] = 2.0 * rng.gen_range(-1.0f64..1.0f64);
171        }
172    }
173
174    // Generate _samples using GPU-accelerated operations
175    let mut data = vec![0.0; n_samples * n_features];
176    let mut targets = vec![0.0; n_samples];
177
178    // Implement GPU buffer operations for accelerated data generation
179    if *gpu_context.backend() != LocalGpuBackend::Cpu {
180        return generate_classification_gpu_optimized(
181            gpu_context,
182            &centroids,
183            n_samples,
184            n_features,
185            n_classes,
186            n_clusters_per_class,
187            n_informative,
188            &mut rng,
189        );
190    }
191
192    // CPU fallback: Generate _samples in parallel chunks
193    let samples_per_class = n_samples / n_classes;
194    let remainder = n_samples % n_classes;
195
196    let mut sample_idx = 0;
197    for _class in 0..n_classes {
198        let n_samples_class = if _class < remainder {
199            samples_per_class + 1
200        } else {
201            samples_per_class
202        };
203
204        let samples_per_cluster = n_samples_class / n_clusters_per_class;
205        let cluster_remainder = n_samples_class % n_clusters_per_class;
206
207        for cluster in 0..n_clusters_per_class {
208            let n_samples_cluster = if cluster < cluster_remainder {
209                samples_per_cluster + 1
210            } else {
211                samples_per_cluster
212            };
213
214            let centroid_idx = _class * n_clusters_per_class + cluster;
215
216            for _ in 0..n_samples_cluster {
217                // Generate sample around centroid
218                for j in 0..n_informative {
219                    let centroid_val = centroids[centroid_idx * n_informative + j];
220                    let noise = scirs2_core::random::Normal::new(0.0, 0.3)
221                        .unwrap()
222                        .sample(&mut rng);
223                    data[sample_idx * n_features + j] = centroid_val + noise;
224                }
225
226                // Add noise _features
227                for j in n_informative..n_features {
228                    data[sample_idx * n_features + j] = scirs2_core::random::Normal::new(0.0, 1.0)
229                        .unwrap()
230                        .sample(&mut rng);
231                }
232
233                targets[sample_idx] = _class as f64;
234                sample_idx += 1;
235            }
236        }
237    }
238
239    Ok((data, targets))
240}
241
242/// GPU-optimized classification data generation using buffer operations
243#[allow(clippy::too_many_arguments)]
244#[allow(dead_code)]
245fn generate_classification_gpu_optimized(
246    _gpu_context: &GpuContext,
247    centroids: &[f64],
248    n_samples: usize,
249    n_features: usize,
250    n_classes: usize,
251    n_clusters_per_class: usize,
252    n_informative: usize,
253    rng: &mut StdRng,
254) -> Result<(Vec<f64>, Vec<f64>)> {
255    // For now, use CPU-based implementation since core GPU _features are not available
256    // TODO: Implement proper GPU acceleration when core GPU _features are stabilized
257
258    // CPU fallback implementation since GPU _features are not available
259    use scirs2_core::random::Distribution;
260    let normal = scirs2_core::random::Normal::new(0.0, 1.0).unwrap();
261
262    let mut data = vec![0.0; n_samples * n_features];
263    let mut targets = vec![0.0; n_samples];
264
265    // Samples per _class
266    let samples_per_class = n_samples / n_classes;
267    let remainder = n_samples % n_classes;
268
269    let mut sample_idx = 0;
270
271    for _class in 0..n_classes {
272        let n_samples_class = if _class < remainder {
273            samples_per_class + 1
274        } else {
275            samples_per_class
276        };
277
278        // Samples per cluster within this _class
279        let samples_per_cluster = n_samples_class / n_clusters_per_class;
280        let cluster_remainder = n_samples_class % n_clusters_per_class;
281
282        for cluster in 0..n_clusters_per_class {
283            let n_samples_cluster = if cluster < cluster_remainder {
284                samples_per_cluster + 1
285            } else {
286                samples_per_cluster
287            };
288
289            let centroid_idx = _class * n_clusters_per_class + cluster;
290
291            for _ in 0..n_samples_cluster {
292                // Generate _informative _features around cluster centroid
293                for j in 0..n_informative {
294                    let centroid_val = centroids[centroid_idx * n_informative + j];
295                    data[sample_idx * n_features + j] = centroid_val + 0.3 * normal.sample(rng);
296                }
297
298                // Generate noise _features
299                for j in n_informative..n_features {
300                    data[sample_idx * n_features + j] = normal.sample(rng);
301                }
302
303                targets[sample_idx] = _class as f64;
304                sample_idx += 1;
305            }
306        }
307    }
308
309    // TODO: Future GPU implementation placeholder - currently using CPU fallback
310
311    Ok((data, targets))
312}
313
314/// GPU-accelerated regression dataset generation
315#[allow(dead_code)]
316#[allow(clippy::too_many_arguments)]
317pub fn make_regression_gpu(
318    n_samples: usize,
319    n_features: usize,
320    n_informative: usize,
321    noise: f64,
322    randomseed: Option<u64>,
323    gpuconfig: GpuConfig,
324) -> Result<Dataset> {
325    // Check if GPU is available and requested
326    if gpuconfig.use_gpu && gpu_is_available() {
327        make_regression_gpu_impl(
328            n_samples,
329            n_features,
330            n_informative,
331            noise,
332            randomseed,
333            gpuconfig,
334        )
335    } else {
336        // Fallback to CPU implementation
337        make_regression(n_samples, n_features, n_informative, noise, randomseed)
338    }
339}
340
341/// Internal GPU implementation for regression data generation
342#[allow(dead_code)]
343fn make_regression_gpu_impl(
344    n_samples: usize,
345    n_features: usize,
346    n_informative: usize,
347    noise: f64,
348    randomseed: Option<u64>,
349    gpuconfig: GpuConfig,
350) -> Result<Dataset> {
351    // Input validation
352    if n_samples == 0 || n_features == 0 || n_informative == 0 {
353        return Err(DatasetsError::InvalidFormat(
354            "n_samples, n_features, and n_informative must be > 0".to_string(),
355        ));
356    }
357
358    if n_features < n_informative {
359        return Err(DatasetsError::InvalidFormat(format!(
360            "n_features ({n_features}) must be >= n_informative ({n_informative})"
361        )));
362    }
363
364    // Create GPU context
365    let gpu_context = GpuContext::new(crate::gpu::GpuConfig {
366        backend: crate::gpu::GpuBackend::Cuda {
367            device_id: gpuconfig.device_id as u32,
368        },
369        memory: crate::gpu::GpuMemoryConfig::default(),
370        threads_per_block: 256,
371        enable_double_precision: !gpuconfig.use_single_precision,
372        use_fast_math: false,
373        random_seed: None,
374    })
375    .map_err(|e| DatasetsError::Other(format!("Failed to create GPU context: {e}")))?;
376
377    let _seed = randomseed.unwrap_or(42);
378    let mut rng = StdRng::seed_from_u64(_seed);
379
380    // Generate coefficient matrix on GPU
381    let mut coefficients = vec![0.0; n_informative];
382    for coeff in coefficients.iter_mut().take(n_informative) {
383        *coeff = rng.gen_range(-2.0f64..2.0f64);
384    }
385
386    // Generate data matrix in chunks
387    let chunk_size = std::cmp::min(gpuconfig.chunk_size, n_samples);
388    let num_chunks = n_samples.div_ceil(chunk_size);
389
390    let mut all_data = Vec::new();
391    let mut all_targets = Vec::new();
392
393    for chunk_idx in 0..num_chunks {
394        let start_idx = chunk_idx * chunk_size;
395        let end_idx = std::cmp::min(start_idx + chunk_size, n_samples);
396        let chunk_samples = end_idx - start_idx;
397
398        // Generate chunk on GPU
399        let (chunk_data, chunk_targets) = generate_regression_chunk_gpu(
400            &gpu_context,
401            chunk_samples,
402            n_features,
403            n_informative,
404            &coefficients,
405            noise,
406            randomseed.map(|s| s + chunk_idx as u64),
407        )?;
408
409        all_data.extend(chunk_data);
410        all_targets.extend(chunk_targets);
411    }
412
413    // Convert to ndarray
414    let data = Array2::from_shape_vec((n_samples, n_features), all_data)
415        .map_err(|e| DatasetsError::Other(format!("Failed to create data array: {e}")))?;
416
417    let target = Array1::from_vec(all_targets);
418
419    // Create dataset
420    let mut dataset = Dataset::new(data, Some(target));
421
422    // Add metadata
423    let featurenames: Vec<String> = (0..n_features).map(|i| format!("feature_{i}")).collect();
424
425    dataset = dataset
426        .with_featurenames(featurenames)
427        .with_description(format!(
428            "GPU-accelerated synthetic regression dataset with {n_features} _features"
429        ));
430
431    Ok(dataset)
432}
433
434/// Generate a chunk of regression data on GPU
435#[allow(dead_code)]
436fn generate_regression_chunk_gpu(
437    gpu_context: &GpuContext,
438    n_samples: usize,
439    n_features: usize,
440    n_informative: usize,
441    coefficients: &[f64],
442    noise: f64,
443    randomseed: Option<u64>,
444) -> Result<(Vec<f64>, Vec<f64>)> {
445    let _seed = randomseed.unwrap_or(42);
446    let mut rng = StdRng::seed_from_u64(_seed);
447
448    // Generate random data matrix
449    let mut data = vec![0.0; n_samples * n_features];
450    let normal = scirs2_core::random::Normal::new(0.0, 1.0).unwrap();
451
452    // Use GPU for matrix multiplication if available
453    for i in 0..n_samples {
454        for j in 0..n_features {
455            data[i * n_features + j] = normal.sample(&mut rng);
456        }
457    }
458
459    // Calculate targets using GPU matrix operations
460    let mut targets = vec![0.0; n_samples];
461    let noise_dist = scirs2_core::random::Normal::new(0.0, noise).unwrap();
462
463    // Create GPU buffers for accelerated matrix operations
464    if *gpu_context.backend() != LocalGpuBackend::Cpu {
465        return generate_regression_gpu_optimized(
466            gpu_context,
467            &data,
468            coefficients,
469            n_samples,
470            n_features,
471            n_informative,
472            noise,
473            &mut rng,
474        );
475    }
476
477    // CPU fallback: Matrix multiplication using nested loops
478    for i in 0..n_samples {
479        let mut target_val = 0.0;
480        for j in 0..n_informative {
481            target_val += data[i * n_features + j] * coefficients[j];
482        }
483
484        // Add noise
485        target_val += noise_dist.sample(&mut rng);
486        targets[i] = target_val;
487    }
488
489    Ok((data, targets))
490}
491
492/// GPU-optimized regression data generation using buffer operations and matrix multiplication
493#[allow(clippy::too_many_arguments)]
494#[allow(dead_code)]
495fn generate_regression_gpu_optimized(
496    _gpu_context: &GpuContext,
497    data: &[f64],
498    coefficients: &[f64],
499    n_samples: usize,
500    n_features: usize,
501    n_informative: usize,
502    noise: f64,
503    rng: &mut StdRng,
504) -> Result<(Vec<f64>, Vec<f64>)> {
505    // For now, use CPU-based implementation since core GPU _features are not available
506    // TODO: Implement proper GPU acceleration when core GPU _features are stabilized
507
508    // CPU fallback implementation since GPU _features are not available
509    use scirs2_core::random::Distribution;
510    let normal = scirs2_core::random::Normal::new(0.0, noise).unwrap();
511
512    let mut targets = vec![0.0; n_samples];
513
514    // Matrix multiplication for regression targets
515    for i in 0..n_samples {
516        let mut target = 0.0;
517        for j in 0..n_informative {
518            target += data[i * n_features + j] * coefficients[j];
519        }
520
521        // Add noise
522        target += normal.sample(rng);
523        targets[i] = target;
524    }
525
526    Ok((data.to_vec(), targets))
527}
528
529/// GPU-accelerated blob generation
530#[allow(dead_code)]
531pub fn make_blobs_gpu(
532    n_samples: usize,
533    n_features: usize,
534    n_centers: usize,
535    cluster_std: f64,
536    randomseed: Option<u64>,
537    gpuconfig: GpuConfig,
538) -> Result<Dataset> {
539    // Check if GPU is available and requested
540    if gpuconfig.use_gpu && gpu_is_available() {
541        make_blobs_gpu_impl(
542            n_samples,
543            n_features,
544            n_centers,
545            cluster_std,
546            randomseed,
547            gpuconfig,
548        )
549    } else {
550        // Fallback to CPU implementation
551        make_blobs(n_samples, n_features, n_centers, cluster_std, randomseed)
552    }
553}
554
555/// Internal GPU implementation for blob generation
556#[allow(dead_code)]
557fn make_blobs_gpu_impl(
558    n_samples: usize,
559    n_features: usize,
560    n_centers: usize,
561    cluster_std: f64,
562    randomseed: Option<u64>,
563    gpuconfig: GpuConfig,
564) -> Result<Dataset> {
565    // Input validation
566    if n_samples == 0 || n_features == 0 || n_centers == 0 {
567        return Err(DatasetsError::InvalidFormat(
568            "n_samples, n_features, and n_centers must be > 0".to_string(),
569        ));
570    }
571
572    if cluster_std <= 0.0 {
573        return Err(DatasetsError::InvalidFormat(
574            "cluster_std must be > 0".to_string(),
575        ));
576    }
577
578    // Create GPU context
579    let gpu_context = GpuContext::new(crate::gpu::GpuConfig {
580        backend: crate::gpu::GpuBackend::Cuda {
581            device_id: gpuconfig.device_id as u32,
582        },
583        memory: crate::gpu::GpuMemoryConfig::default(),
584        threads_per_block: 256,
585        enable_double_precision: !gpuconfig.use_single_precision,
586        use_fast_math: false,
587        random_seed: None,
588    })
589    .map_err(|e| DatasetsError::Other(format!("Failed to create GPU context: {e}")))?;
590
591    let _seed = randomseed.unwrap_or(42);
592    let mut rng = StdRng::seed_from_u64(_seed);
593
594    // Generate cluster _centers
595    let mut centers = Array2::zeros((n_centers, n_features));
596    let center_dist = scirs2_core::random::Normal::new(0.0, 10.0).unwrap();
597
598    for i in 0..n_centers {
599        for j in 0..n_features {
600            centers[[i, j]] = center_dist.sample(&mut rng);
601        }
602    }
603
604    // Generate _samples around _centers using GPU acceleration
605    let samples_per_center = n_samples / n_centers;
606    let remainder = n_samples % n_centers;
607
608    let mut data = Array2::zeros((n_samples, n_features));
609    let mut target = Array1::zeros(n_samples);
610
611    let mut sample_idx = 0;
612    let noise_dist = scirs2_core::random::Normal::new(0.0, cluster_std).unwrap();
613
614    for center_idx in 0..n_centers {
615        let n_samples_center = if center_idx < remainder {
616            samples_per_center + 1
617        } else {
618            samples_per_center
619        };
620
621        // Generate _samples for this center using GPU acceleration
622        if *gpu_context.backend() != LocalGpuBackend::Cpu {
623            // Use GPU kernel for parallel sample generation
624            let gpu_generated = generate_blobs_center_gpu(
625                &gpu_context,
626                &centers,
627                center_idx,
628                n_samples_center,
629                n_features,
630                cluster_std,
631                &mut rng,
632            )?;
633
634            // Copy GPU-generated data to main arrays
635            for (local_idx, sample) in gpu_generated.iter().enumerate() {
636                for j in 0..n_features {
637                    data[[sample_idx + local_idx, j]] = sample[j];
638                }
639                target[sample_idx + local_idx] = center_idx as f64;
640            }
641            sample_idx += n_samples_center;
642        } else {
643            // CPU fallback: generate sequentially
644            for _ in 0..n_samples_center {
645                for j in 0..n_features {
646                    data[[sample_idx, j]] = centers[[center_idx, j]] + noise_dist.sample(&mut rng);
647                }
648                target[sample_idx] = center_idx as f64;
649                sample_idx += 1;
650            }
651        }
652    }
653
654    // Create dataset
655    let mut dataset = Dataset::new(data, Some(target));
656
657    // Add metadata
658    let featurenames: Vec<String> = (0..n_features).map(|i| format!("feature_{i}")).collect();
659    let centernames: Vec<String> = (0..n_centers).map(|i| format!("center_{i}")).collect();
660
661    dataset = dataset
662        .with_featurenames(featurenames)
663        .with_targetnames(centernames)
664        .with_description(format!(
665            "GPU-accelerated synthetic blob dataset with {n_centers} _centers and {n_features} _features"
666        ));
667
668    Ok(dataset)
669}
670
671/// GPU-optimized blob center generation using parallel kernels
672#[allow(dead_code)]
673fn generate_blobs_center_gpu(
674    _gpu_context: &GpuContext,
675    centers: &Array2<f64>,
676    center_idx: usize,
677    n_samples_center: usize,
678    n_features: usize,
679    cluster_std: f64,
680    rng: &mut StdRng,
681) -> Result<Vec<Vec<f64>>> {
682    // For now, use CPU-based implementation since core GPU _features are not available
683    // TODO: Implement proper GPU acceleration when core GPU _features are stabilized
684
685    // Extract _center coordinates for this specific _center
686    let _center_coords: Vec<f64> = (0..n_features).map(|j| centers[[center_idx, j]]).collect();
687
688    // CPU fallback implementation since GPU _features are not available
689    use scirs2_core::random::Distribution;
690    let normal = scirs2_core::random::Normal::new(0.0, cluster_std).unwrap();
691
692    let mut result = Vec::with_capacity(n_samples_center);
693
694    for _ in 0..n_samples_center {
695        let mut sample = Vec::with_capacity(n_features);
696        for j in 0..n_features {
697            let center_val = centers[[center_idx, j]];
698            let noise = normal.sample(rng);
699            sample.push(center_val + noise);
700        }
701        result.push(sample);
702    }
703
704    Ok(result)
705}
706
707/// Check if GPU is available for acceleration
708#[allow(dead_code)]
709pub fn gpu_is_available() -> bool {
710    // Try to create a GPU context to check availability
711    GpuContext::new(crate::gpu::GpuConfig::default()).is_ok()
712}
713
714/// Get GPU device information
715#[allow(dead_code)]
716pub fn get_gpu_info() -> Result<Vec<GpuDeviceInfo>> {
717    crate::gpu::list_gpu_devices()
718        .map_err(|e| DatasetsError::Other(format!("Failed to get GPU info: {e}")))
719}
720
721/// Benchmark GPU vs CPU performance for data generation
722#[allow(dead_code)]
723pub fn benchmark_gpu_vs_cpu(
724    n_samples: usize,
725    n_features: usize,
726    iterations: usize,
727) -> Result<(f64, f64)> {
728    use std::time::Instant;
729
730    // Benchmark CPU implementation
731    let cpu_start = Instant::now();
732    for _ in 0..iterations {
733        let _result = make_classification(n_samples, n_features, 3, 2, n_features, Some(42))?;
734    }
735    let cpu_time = cpu_start.elapsed().as_secs_f64() / iterations as f64;
736
737    // Benchmark GPU implementation
738    let gpuconfig = GpuConfig::default();
739    let gpu_start = Instant::now();
740    for _ in 0..iterations {
741        let _result = make_classification_gpu(
742            n_samples,
743            n_features,
744            3,
745            2,
746            n_features,
747            Some(42),
748            gpuconfig.clone(),
749        )?;
750    }
751    let gpu_time = gpu_start.elapsed().as_secs_f64() / iterations as f64;
752
753    Ok((cpu_time, gpu_time))
754}