1use super::basic::{make_blobs, make_classification, make_regression};
4use super::config::GpuConfig;
5use crate::error::{DatasetsError, Result};
6use crate::gpu::{GpuContext, GpuDeviceInfo};
7use crate::utils::Dataset;
8use scirs2_core::ndarray::{Array1, Array2};
9use scirs2_core::random::prelude::*;
10use scirs2_core::random::rngs::StdRng;
11use scirs2_core::random::Distribution;
12use crate::gpu::GpuBackend as LocalGpuBackend;
14
15#[allow(dead_code)]
17#[allow(clippy::too_many_arguments)]
18pub fn make_classification_gpu(
19 n_samples: usize,
20 n_features: usize,
21 n_classes: usize,
22 n_clusters_per_class: usize,
23 n_informative: usize,
24 randomseed: Option<u64>,
25 gpuconfig: GpuConfig,
26) -> Result<Dataset> {
27 if gpuconfig.use_gpu && gpu_is_available() {
29 make_classification_gpu_impl(
30 n_samples,
31 n_features,
32 n_classes,
33 n_clusters_per_class,
34 n_informative,
35 randomseed,
36 gpuconfig,
37 )
38 } else {
39 make_classification(
41 n_samples,
42 n_features,
43 n_classes,
44 n_clusters_per_class,
45 n_informative,
46 randomseed,
47 )
48 }
49}
50
51#[allow(dead_code)]
53fn make_classification_gpu_impl(
54 n_samples: usize,
55 n_features: usize,
56 n_classes: usize,
57 n_clusters_per_class: usize,
58 n_informative: usize,
59 randomseed: Option<u64>,
60 gpuconfig: GpuConfig,
61) -> Result<Dataset> {
62 if n_samples == 0 || n_features == 0 || n_informative == 0 {
64 return Err(DatasetsError::InvalidFormat(
65 "n_samples, n_features, and n_informative must be > 0".to_string(),
66 ));
67 }
68
69 if n_features < n_informative {
70 return Err(DatasetsError::InvalidFormat(format!(
71 "n_features ({n_features}) must be >= n_informative ({n_informative})"
72 )));
73 }
74
75 if n_classes < 2 || n_clusters_per_class == 0 {
76 return Err(DatasetsError::InvalidFormat(
77 "n_classes must be >= 2 and n_clusters_per_class must be > 0".to_string(),
78 ));
79 }
80
81 let gpu_context = GpuContext::new(crate::gpu::GpuConfig {
83 backend: crate::gpu::GpuBackend::Cuda {
84 device_id: gpuconfig.device_id as u32,
85 },
86 memory: crate::gpu::GpuMemoryConfig::default(),
87 threads_per_block: 256,
88 enable_double_precision: !gpuconfig.use_single_precision,
89 use_fast_math: false,
90 random_seed: None,
91 })
92 .map_err(|e| DatasetsError::Other(format!("Failed to create GPU context: {e}")))?;
93
94 let chunk_size = std::cmp::min(gpuconfig.chunk_size, n_samples);
96 let num_chunks = n_samples.div_ceil(chunk_size);
97
98 let mut all_data = Vec::new();
99 let mut all_targets = Vec::new();
100
101 for chunk_idx in 0..num_chunks {
102 let start_idx = chunk_idx * chunk_size;
103 let end_idx = std::cmp::min(start_idx + chunk_size, n_samples);
104 let chunk_samples = end_idx - start_idx;
105
106 let (chunk_data, chunk_targets) = generate_classification_chunk_gpu(
108 &gpu_context,
109 chunk_samples,
110 n_features,
111 n_classes,
112 n_clusters_per_class,
113 n_informative,
114 randomseed.map(|s| s + chunk_idx as u64),
115 gpuconfig.use_single_precision,
116 )?;
117
118 all_data.extend(chunk_data);
119 all_targets.extend(chunk_targets);
120 }
121
122 let data = Array2::from_shape_vec((n_samples, n_features), all_data)
124 .map_err(|e| DatasetsError::Other(format!("Failed to create data array: {e}")))?;
125
126 let target = Array1::from_vec(all_targets);
127
128 let mut dataset = Dataset::new(data, Some(target));
130
131 let featurenames: Vec<String> = (0..n_features).map(|i| format!("feature_{i}")).collect();
133 let classnames: Vec<String> = (0..n_classes).map(|i| format!("class_{i}")).collect();
134
135 dataset = dataset
136 .with_featurenames(featurenames)
137 .with_targetnames(classnames)
138 .with_description(format!(
139 "GPU-accelerated synthetic classification dataset with {n_classes} _classes and {n_features} _features"
140 ));
141
142 Ok(dataset)
143}
144
145#[allow(dead_code)]
147#[allow(clippy::too_many_arguments)]
148fn generate_classification_chunk_gpu(
149 gpu_context: &GpuContext,
150 n_samples: usize,
151 n_features: usize,
152 n_classes: usize,
153 n_clusters_per_class: usize,
154 n_informative: usize,
155 randomseed: Option<u64>,
156 _use_single_precision: bool,
157) -> Result<(Vec<f64>, Vec<f64>)> {
158 let _seed = randomseed.unwrap_or(42);
162 let mut rng = StdRng::seed_from_u64(_seed);
163
164 let n_centroids = n_classes * n_clusters_per_class;
166 let mut centroids = vec![0.0; n_centroids * n_informative];
167
168 for i in 0..n_centroids {
169 for j in 0..n_informative {
170 centroids[i * n_informative + j] = 2.0 * rng.gen_range(-1.0f64..1.0f64);
171 }
172 }
173
174 let mut data = vec![0.0; n_samples * n_features];
176 let mut targets = vec![0.0; n_samples];
177
178 if *gpu_context.backend() != LocalGpuBackend::Cpu {
180 return generate_classification_gpu_optimized(
181 gpu_context,
182 ¢roids,
183 n_samples,
184 n_features,
185 n_classes,
186 n_clusters_per_class,
187 n_informative,
188 &mut rng,
189 );
190 }
191
192 let samples_per_class = n_samples / n_classes;
194 let remainder = n_samples % n_classes;
195
196 let mut sample_idx = 0;
197 for _class in 0..n_classes {
198 let n_samples_class = if _class < remainder {
199 samples_per_class + 1
200 } else {
201 samples_per_class
202 };
203
204 let samples_per_cluster = n_samples_class / n_clusters_per_class;
205 let cluster_remainder = n_samples_class % n_clusters_per_class;
206
207 for cluster in 0..n_clusters_per_class {
208 let n_samples_cluster = if cluster < cluster_remainder {
209 samples_per_cluster + 1
210 } else {
211 samples_per_cluster
212 };
213
214 let centroid_idx = _class * n_clusters_per_class + cluster;
215
216 for _ in 0..n_samples_cluster {
217 for j in 0..n_informative {
219 let centroid_val = centroids[centroid_idx * n_informative + j];
220 let noise = scirs2_core::random::Normal::new(0.0, 0.3)
221 .unwrap()
222 .sample(&mut rng);
223 data[sample_idx * n_features + j] = centroid_val + noise;
224 }
225
226 for j in n_informative..n_features {
228 data[sample_idx * n_features + j] = scirs2_core::random::Normal::new(0.0, 1.0)
229 .unwrap()
230 .sample(&mut rng);
231 }
232
233 targets[sample_idx] = _class as f64;
234 sample_idx += 1;
235 }
236 }
237 }
238
239 Ok((data, targets))
240}
241
242#[allow(clippy::too_many_arguments)]
244#[allow(dead_code)]
245fn generate_classification_gpu_optimized(
246 _gpu_context: &GpuContext,
247 centroids: &[f64],
248 n_samples: usize,
249 n_features: usize,
250 n_classes: usize,
251 n_clusters_per_class: usize,
252 n_informative: usize,
253 rng: &mut StdRng,
254) -> Result<(Vec<f64>, Vec<f64>)> {
255 use scirs2_core::random::Distribution;
260 let normal = scirs2_core::random::Normal::new(0.0, 1.0).unwrap();
261
262 let mut data = vec![0.0; n_samples * n_features];
263 let mut targets = vec![0.0; n_samples];
264
265 let samples_per_class = n_samples / n_classes;
267 let remainder = n_samples % n_classes;
268
269 let mut sample_idx = 0;
270
271 for _class in 0..n_classes {
272 let n_samples_class = if _class < remainder {
273 samples_per_class + 1
274 } else {
275 samples_per_class
276 };
277
278 let samples_per_cluster = n_samples_class / n_clusters_per_class;
280 let cluster_remainder = n_samples_class % n_clusters_per_class;
281
282 for cluster in 0..n_clusters_per_class {
283 let n_samples_cluster = if cluster < cluster_remainder {
284 samples_per_cluster + 1
285 } else {
286 samples_per_cluster
287 };
288
289 let centroid_idx = _class * n_clusters_per_class + cluster;
290
291 for _ in 0..n_samples_cluster {
292 for j in 0..n_informative {
294 let centroid_val = centroids[centroid_idx * n_informative + j];
295 data[sample_idx * n_features + j] = centroid_val + 0.3 * normal.sample(rng);
296 }
297
298 for j in n_informative..n_features {
300 data[sample_idx * n_features + j] = normal.sample(rng);
301 }
302
303 targets[sample_idx] = _class as f64;
304 sample_idx += 1;
305 }
306 }
307 }
308
309 Ok((data, targets))
312}
313
314#[allow(dead_code)]
316#[allow(clippy::too_many_arguments)]
317pub fn make_regression_gpu(
318 n_samples: usize,
319 n_features: usize,
320 n_informative: usize,
321 noise: f64,
322 randomseed: Option<u64>,
323 gpuconfig: GpuConfig,
324) -> Result<Dataset> {
325 if gpuconfig.use_gpu && gpu_is_available() {
327 make_regression_gpu_impl(
328 n_samples,
329 n_features,
330 n_informative,
331 noise,
332 randomseed,
333 gpuconfig,
334 )
335 } else {
336 make_regression(n_samples, n_features, n_informative, noise, randomseed)
338 }
339}
340
341#[allow(dead_code)]
343fn make_regression_gpu_impl(
344 n_samples: usize,
345 n_features: usize,
346 n_informative: usize,
347 noise: f64,
348 randomseed: Option<u64>,
349 gpuconfig: GpuConfig,
350) -> Result<Dataset> {
351 if n_samples == 0 || n_features == 0 || n_informative == 0 {
353 return Err(DatasetsError::InvalidFormat(
354 "n_samples, n_features, and n_informative must be > 0".to_string(),
355 ));
356 }
357
358 if n_features < n_informative {
359 return Err(DatasetsError::InvalidFormat(format!(
360 "n_features ({n_features}) must be >= n_informative ({n_informative})"
361 )));
362 }
363
364 let gpu_context = GpuContext::new(crate::gpu::GpuConfig {
366 backend: crate::gpu::GpuBackend::Cuda {
367 device_id: gpuconfig.device_id as u32,
368 },
369 memory: crate::gpu::GpuMemoryConfig::default(),
370 threads_per_block: 256,
371 enable_double_precision: !gpuconfig.use_single_precision,
372 use_fast_math: false,
373 random_seed: None,
374 })
375 .map_err(|e| DatasetsError::Other(format!("Failed to create GPU context: {e}")))?;
376
377 let _seed = randomseed.unwrap_or(42);
378 let mut rng = StdRng::seed_from_u64(_seed);
379
380 let mut coefficients = vec![0.0; n_informative];
382 for coeff in coefficients.iter_mut().take(n_informative) {
383 *coeff = rng.gen_range(-2.0f64..2.0f64);
384 }
385
386 let chunk_size = std::cmp::min(gpuconfig.chunk_size, n_samples);
388 let num_chunks = n_samples.div_ceil(chunk_size);
389
390 let mut all_data = Vec::new();
391 let mut all_targets = Vec::new();
392
393 for chunk_idx in 0..num_chunks {
394 let start_idx = chunk_idx * chunk_size;
395 let end_idx = std::cmp::min(start_idx + chunk_size, n_samples);
396 let chunk_samples = end_idx - start_idx;
397
398 let (chunk_data, chunk_targets) = generate_regression_chunk_gpu(
400 &gpu_context,
401 chunk_samples,
402 n_features,
403 n_informative,
404 &coefficients,
405 noise,
406 randomseed.map(|s| s + chunk_idx as u64),
407 )?;
408
409 all_data.extend(chunk_data);
410 all_targets.extend(chunk_targets);
411 }
412
413 let data = Array2::from_shape_vec((n_samples, n_features), all_data)
415 .map_err(|e| DatasetsError::Other(format!("Failed to create data array: {e}")))?;
416
417 let target = Array1::from_vec(all_targets);
418
419 let mut dataset = Dataset::new(data, Some(target));
421
422 let featurenames: Vec<String> = (0..n_features).map(|i| format!("feature_{i}")).collect();
424
425 dataset = dataset
426 .with_featurenames(featurenames)
427 .with_description(format!(
428 "GPU-accelerated synthetic regression dataset with {n_features} _features"
429 ));
430
431 Ok(dataset)
432}
433
434#[allow(dead_code)]
436fn generate_regression_chunk_gpu(
437 gpu_context: &GpuContext,
438 n_samples: usize,
439 n_features: usize,
440 n_informative: usize,
441 coefficients: &[f64],
442 noise: f64,
443 randomseed: Option<u64>,
444) -> Result<(Vec<f64>, Vec<f64>)> {
445 let _seed = randomseed.unwrap_or(42);
446 let mut rng = StdRng::seed_from_u64(_seed);
447
448 let mut data = vec![0.0; n_samples * n_features];
450 let normal = scirs2_core::random::Normal::new(0.0, 1.0).unwrap();
451
452 for i in 0..n_samples {
454 for j in 0..n_features {
455 data[i * n_features + j] = normal.sample(&mut rng);
456 }
457 }
458
459 let mut targets = vec![0.0; n_samples];
461 let noise_dist = scirs2_core::random::Normal::new(0.0, noise).unwrap();
462
463 if *gpu_context.backend() != LocalGpuBackend::Cpu {
465 return generate_regression_gpu_optimized(
466 gpu_context,
467 &data,
468 coefficients,
469 n_samples,
470 n_features,
471 n_informative,
472 noise,
473 &mut rng,
474 );
475 }
476
477 for i in 0..n_samples {
479 let mut target_val = 0.0;
480 for j in 0..n_informative {
481 target_val += data[i * n_features + j] * coefficients[j];
482 }
483
484 target_val += noise_dist.sample(&mut rng);
486 targets[i] = target_val;
487 }
488
489 Ok((data, targets))
490}
491
492#[allow(clippy::too_many_arguments)]
494#[allow(dead_code)]
495fn generate_regression_gpu_optimized(
496 _gpu_context: &GpuContext,
497 data: &[f64],
498 coefficients: &[f64],
499 n_samples: usize,
500 n_features: usize,
501 n_informative: usize,
502 noise: f64,
503 rng: &mut StdRng,
504) -> Result<(Vec<f64>, Vec<f64>)> {
505 use scirs2_core::random::Distribution;
510 let normal = scirs2_core::random::Normal::new(0.0, noise).unwrap();
511
512 let mut targets = vec![0.0; n_samples];
513
514 for i in 0..n_samples {
516 let mut target = 0.0;
517 for j in 0..n_informative {
518 target += data[i * n_features + j] * coefficients[j];
519 }
520
521 target += normal.sample(rng);
523 targets[i] = target;
524 }
525
526 Ok((data.to_vec(), targets))
527}
528
529#[allow(dead_code)]
531pub fn make_blobs_gpu(
532 n_samples: usize,
533 n_features: usize,
534 n_centers: usize,
535 cluster_std: f64,
536 randomseed: Option<u64>,
537 gpuconfig: GpuConfig,
538) -> Result<Dataset> {
539 if gpuconfig.use_gpu && gpu_is_available() {
541 make_blobs_gpu_impl(
542 n_samples,
543 n_features,
544 n_centers,
545 cluster_std,
546 randomseed,
547 gpuconfig,
548 )
549 } else {
550 make_blobs(n_samples, n_features, n_centers, cluster_std, randomseed)
552 }
553}
554
555#[allow(dead_code)]
557fn make_blobs_gpu_impl(
558 n_samples: usize,
559 n_features: usize,
560 n_centers: usize,
561 cluster_std: f64,
562 randomseed: Option<u64>,
563 gpuconfig: GpuConfig,
564) -> Result<Dataset> {
565 if n_samples == 0 || n_features == 0 || n_centers == 0 {
567 return Err(DatasetsError::InvalidFormat(
568 "n_samples, n_features, and n_centers must be > 0".to_string(),
569 ));
570 }
571
572 if cluster_std <= 0.0 {
573 return Err(DatasetsError::InvalidFormat(
574 "cluster_std must be > 0".to_string(),
575 ));
576 }
577
578 let gpu_context = GpuContext::new(crate::gpu::GpuConfig {
580 backend: crate::gpu::GpuBackend::Cuda {
581 device_id: gpuconfig.device_id as u32,
582 },
583 memory: crate::gpu::GpuMemoryConfig::default(),
584 threads_per_block: 256,
585 enable_double_precision: !gpuconfig.use_single_precision,
586 use_fast_math: false,
587 random_seed: None,
588 })
589 .map_err(|e| DatasetsError::Other(format!("Failed to create GPU context: {e}")))?;
590
591 let _seed = randomseed.unwrap_or(42);
592 let mut rng = StdRng::seed_from_u64(_seed);
593
594 let mut centers = Array2::zeros((n_centers, n_features));
596 let center_dist = scirs2_core::random::Normal::new(0.0, 10.0).unwrap();
597
598 for i in 0..n_centers {
599 for j in 0..n_features {
600 centers[[i, j]] = center_dist.sample(&mut rng);
601 }
602 }
603
604 let samples_per_center = n_samples / n_centers;
606 let remainder = n_samples % n_centers;
607
608 let mut data = Array2::zeros((n_samples, n_features));
609 let mut target = Array1::zeros(n_samples);
610
611 let mut sample_idx = 0;
612 let noise_dist = scirs2_core::random::Normal::new(0.0, cluster_std).unwrap();
613
614 for center_idx in 0..n_centers {
615 let n_samples_center = if center_idx < remainder {
616 samples_per_center + 1
617 } else {
618 samples_per_center
619 };
620
621 if *gpu_context.backend() != LocalGpuBackend::Cpu {
623 let gpu_generated = generate_blobs_center_gpu(
625 &gpu_context,
626 ¢ers,
627 center_idx,
628 n_samples_center,
629 n_features,
630 cluster_std,
631 &mut rng,
632 )?;
633
634 for (local_idx, sample) in gpu_generated.iter().enumerate() {
636 for j in 0..n_features {
637 data[[sample_idx + local_idx, j]] = sample[j];
638 }
639 target[sample_idx + local_idx] = center_idx as f64;
640 }
641 sample_idx += n_samples_center;
642 } else {
643 for _ in 0..n_samples_center {
645 for j in 0..n_features {
646 data[[sample_idx, j]] = centers[[center_idx, j]] + noise_dist.sample(&mut rng);
647 }
648 target[sample_idx] = center_idx as f64;
649 sample_idx += 1;
650 }
651 }
652 }
653
654 let mut dataset = Dataset::new(data, Some(target));
656
657 let featurenames: Vec<String> = (0..n_features).map(|i| format!("feature_{i}")).collect();
659 let centernames: Vec<String> = (0..n_centers).map(|i| format!("center_{i}")).collect();
660
661 dataset = dataset
662 .with_featurenames(featurenames)
663 .with_targetnames(centernames)
664 .with_description(format!(
665 "GPU-accelerated synthetic blob dataset with {n_centers} _centers and {n_features} _features"
666 ));
667
668 Ok(dataset)
669}
670
671#[allow(dead_code)]
673fn generate_blobs_center_gpu(
674 _gpu_context: &GpuContext,
675 centers: &Array2<f64>,
676 center_idx: usize,
677 n_samples_center: usize,
678 n_features: usize,
679 cluster_std: f64,
680 rng: &mut StdRng,
681) -> Result<Vec<Vec<f64>>> {
682 let _center_coords: Vec<f64> = (0..n_features).map(|j| centers[[center_idx, j]]).collect();
687
688 use scirs2_core::random::Distribution;
690 let normal = scirs2_core::random::Normal::new(0.0, cluster_std).unwrap();
691
692 let mut result = Vec::with_capacity(n_samples_center);
693
694 for _ in 0..n_samples_center {
695 let mut sample = Vec::with_capacity(n_features);
696 for j in 0..n_features {
697 let center_val = centers[[center_idx, j]];
698 let noise = normal.sample(rng);
699 sample.push(center_val + noise);
700 }
701 result.push(sample);
702 }
703
704 Ok(result)
705}
706
707#[allow(dead_code)]
709pub fn gpu_is_available() -> bool {
710 GpuContext::new(crate::gpu::GpuConfig::default()).is_ok()
712}
713
714#[allow(dead_code)]
716pub fn get_gpu_info() -> Result<Vec<GpuDeviceInfo>> {
717 crate::gpu::list_gpu_devices()
718 .map_err(|e| DatasetsError::Other(format!("Failed to get GPU info: {e}")))
719}
720
721#[allow(dead_code)]
723pub fn benchmark_gpu_vs_cpu(
724 n_samples: usize,
725 n_features: usize,
726 iterations: usize,
727) -> Result<(f64, f64)> {
728 use std::time::Instant;
729
730 let cpu_start = Instant::now();
732 for _ in 0..iterations {
733 let _result = make_classification(n_samples, n_features, 3, 2, n_features, Some(42))?;
734 }
735 let cpu_time = cpu_start.elapsed().as_secs_f64() / iterations as f64;
736
737 let gpuconfig = GpuConfig::default();
739 let gpu_start = Instant::now();
740 for _ in 0..iterations {
741 let _result = make_classification_gpu(
742 n_samples,
743 n_features,
744 3,
745 2,
746 n_features,
747 Some(42),
748 gpuconfig.clone(),
749 )?;
750 }
751 let gpu_time = gpu_start.elapsed().as_secs_f64() / iterations as f64;
752
753 Ok((cpu_time, gpu_time))
754}