1use crate::error::{SpecialError, SpecialResult};
7use scirs2_core::gpu::{GpuBackend, GpuContext};
8use std::collections::HashMap;
9use std::sync::{Arc, Mutex, RwLock};
10use std::time::Duration;
11
12#[derive(Debug, Clone)]
14pub struct GpuDeviceInfo {
15 pub device_id: usize,
16 pub device_name: String,
17 pub memorysize: u64,
18 pub compute_units: u32,
19 pub max_workgroupsize: u32,
20 pub backend_type: GpuBackend,
21 pub is_available: bool,
22}
23
24#[derive(Debug, Clone, Default)]
26pub struct GpuPerformanceStats {
27 pub total_operations: u64,
28 pub successful_operations: u64,
29 pub failed_operations: u64,
30 pub total_execution_time: Duration,
31 pub average_execution_time: Duration,
32 pub memory_transfers: u64,
33 pub total_data_transferred: u64,
34 pub peak_memory_usage: u64,
35 pub cache_hit_rate: f64,
36 pub last_error_message: Option<String>,
37 pub operations_per_second: f64,
38}
39
40#[derive(Debug, Clone)]
42pub struct GpuProductionConfig {
43 pub min_gpu_arraysize: usize,
45 pub max_memory_usage_percent: f32,
47 pub enable_adaptive_switching: bool,
49 pub warmup_iterations: u32,
51 pub max_retry_attempts: u32,
53 pub enable_profiling: bool,
55 pub preferred_backend: GpuBackend,
57}
58
59impl Default for GpuProductionConfig {
60 fn default() -> Self {
61 Self {
62 min_gpu_arraysize: 1000,
63 max_memory_usage_percent: 80.0,
64 enable_adaptive_switching: true,
65 warmup_iterations: 3,
66 max_retry_attempts: 3,
67 enable_profiling: false,
68 preferred_backend: GpuBackend::Cpu,
69 }
70 }
71}
72
73pub struct GpuContextPool {
75 contexts: RwLock<HashMap<GpuBackend, Arc<GpuContext>>>,
76 device_info: RwLock<HashMap<GpuBackend, GpuDeviceInfo>>,
77 performance_stats: RwLock<HashMap<GpuBackend, GpuPerformanceStats>>,
78 fallback_threshold: Mutex<usize>,
79 auto_fallback_enabled: Mutex<bool>,
80 production_config: RwLock<GpuProductionConfig>,
81 memory_usage_tracker: RwLock<HashMap<GpuBackend, u64>>,
82}
83
84impl Default for GpuContextPool {
85 fn default() -> Self {
86 Self::new()
87 }
88}
89
90impl GpuContextPool {
91 pub fn new() -> Self {
93 Self {
94 contexts: RwLock::new(HashMap::new()),
95 device_info: RwLock::new(HashMap::new()),
96 performance_stats: RwLock::new(HashMap::new()),
97 fallback_threshold: Mutex::new(5), auto_fallback_enabled: Mutex::new(true),
99 production_config: RwLock::new(GpuProductionConfig::default()),
100 memory_usage_tracker: RwLock::new(HashMap::new()),
101 }
102 }
103
104 pub fn with_config(config: GpuProductionConfig) -> Self {
106 Self {
107 contexts: RwLock::new(HashMap::new()),
108 device_info: RwLock::new(HashMap::new()),
109 performance_stats: RwLock::new(HashMap::new()),
110 fallback_threshold: Mutex::new(config.max_retry_attempts as usize),
111 auto_fallback_enabled: Mutex::new(config.enable_adaptive_switching),
112 production_config: RwLock::new(config),
113 memory_usage_tracker: RwLock::new(HashMap::new()),
114 }
115 }
116
117 pub fn update_config(&self, config: GpuProductionConfig) {
119 *self.production_config.write().expect("Operation failed") = config;
120 }
121
122 pub fn get_config(&self) -> GpuProductionConfig {
124 self.production_config
125 .read()
126 .expect("Operation failed")
127 .clone()
128 }
129
130 pub fn initialize(&self) -> SpecialResult<()> {
132 self.discover_devices()?;
133 self.create_contexts()?;
134 Ok(())
135 }
136
137 fn discover_devices(&self) -> SpecialResult<()> {
139 let mut device_info = self.device_info.write().expect("Operation failed");
140
141 if let Ok(info) = self.probe_webgpu_device() {
143 device_info.insert(GpuBackend::Wgpu, info);
144 }
145
146 if let Ok(info) = self.probe_opencl_device() {
148 device_info.insert(GpuBackend::OpenCL, info);
149 }
150
151 if let Ok(info) = self.probe_cuda_device() {
153 device_info.insert(GpuBackend::Cuda, info);
154 }
155
156 if device_info.is_empty() {
157 #[cfg(feature = "gpu")]
158 log::warn!("No GPU devices discovered");
159 } else {
160 #[cfg(feature = "gpu")]
161 log::info!("Discovered {} GPU device(s)", device_info.len());
162 }
163
164 Ok(())
165 }
166
167 fn probe_webgpu_device(&self) -> SpecialResult<GpuDeviceInfo> {
169 match GpuContext::new(GpuBackend::Wgpu) {
172 Ok(_context) => {
173 let info = GpuDeviceInfo {
174 device_id: 0,
175 device_name: "WebGPU Device".to_string(),
176 memorysize: 1024 * 1024 * 1024, compute_units: 32, max_workgroupsize: 256,
179 backend_type: GpuBackend::Wgpu,
180 is_available: true,
181 };
182
183 #[cfg(feature = "gpu")]
184 log::info!("WebGPU device available: {}", info.device_name);
185
186 Ok(info)
187 }
188 Err(e) => {
189 #[cfg(feature = "gpu")]
190 log::debug!("WebGPU not available: {}", e);
191 Err(SpecialError::GpuNotAvailable(
192 "WebGPU not available".to_string(),
193 ))
194 }
195 }
196 }
197
198 fn probe_opencl_device(&self) -> SpecialResult<GpuDeviceInfo> {
200 #[cfg(feature = "gpu")]
203 log::debug!("Probing OpenCL devices...");
204
205 match GpuContext::new(GpuBackend::OpenCL) {
207 Ok(context) => {
208 let info = self
210 .query_opencl_device_info(&std::sync::Arc::new(context))
211 .unwrap_or_else(|_| {
212 GpuDeviceInfo {
214 device_id: 0,
215 device_name: "OpenCL Device".to_string(),
216 memorysize: 2 * 1024 * 1024 * 1024, compute_units: 16, max_workgroupsize: 256,
219 backend_type: GpuBackend::OpenCL,
220 is_available: true,
221 }
222 });
223
224 #[cfg(feature = "gpu")]
225 log::info!(
226 "OpenCL device available: {} with {} compute units",
227 info.device_name,
228 info.compute_units
229 );
230
231 Ok(info)
232 }
233 Err(e) => {
234 #[cfg(feature = "gpu")]
235 log::debug!("OpenCL not available: {}", e);
236 Err(SpecialError::GpuNotAvailable(format!(
237 "OpenCL not available: {}",
238 e
239 )))
240 }
241 }
242 }
243
244 fn probe_cuda_device(&self) -> SpecialResult<GpuDeviceInfo> {
246 #[cfg(feature = "gpu")]
249 log::debug!("Probing CUDA devices...");
250
251 match GpuContext::new(GpuBackend::Cuda) {
253 Ok(context) => {
254 let info = self
256 .query_cuda_device_info(&std::sync::Arc::new(context))
257 .unwrap_or_else(|_| {
258 GpuDeviceInfo {
260 device_id: 0,
261 device_name: "NVIDIA CUDA Device".to_string(),
262 memorysize: 4 * 1024 * 1024 * 1024, compute_units: 64, max_workgroupsize: 1024, backend_type: GpuBackend::Cuda,
266 is_available: true,
267 }
268 });
269
270 #[cfg(feature = "gpu")]
271 log::info!(
272 "CUDA device available: {} with {} SMs",
273 info.device_name,
274 info.compute_units
275 );
276
277 Ok(info)
278 }
279 Err(e) => {
280 #[cfg(feature = "gpu")]
281 log::debug!("CUDA not available: {}", e);
282 Err(SpecialError::GpuNotAvailable(format!(
283 "CUDA not available: {}",
284 e
285 )))
286 }
287 }
288 }
289
290 fn create_contexts(&self) -> SpecialResult<()> {
292 let device_info = self.device_info.read().expect("Operation failed");
293 let mut contexts = self.contexts.write().expect("Operation failed");
294 let mut stats = self.performance_stats.write().expect("Operation failed");
295
296 for (&backend_type, info) in device_info.iter() {
297 if info.is_available {
298 match GpuContext::new(backend_type) {
299 Ok(context) => {
300 contexts.insert(backend_type, std::sync::Arc::new(context));
301 stats.insert(backend_type, GpuPerformanceStats::default());
302
303 #[cfg(feature = "gpu")]
304 log::info!("Created GPU context for {:?}", backend_type);
305 }
306 Err(e) => {
307 #[cfg(feature = "gpu")]
308 log::warn!("Failed to create context for {:?}: {}", backend_type, e);
309 }
310 }
311 }
312 }
313
314 Ok(())
315 }
316
317 pub fn get_best_context(&self) -> SpecialResult<Arc<GpuContext>> {
319 let contexts = self.contexts.read().expect("Operation failed");
320 let stats = self.performance_stats.read().expect("Operation failed");
321
322 let preferred_order = [GpuBackend::Cuda, GpuBackend::Wgpu, GpuBackend::OpenCL];
324
325 for &backend_type in &preferred_order {
326 if let Some(context) = contexts.get(&backend_type) {
327 if let Some(stat) = stats.get(&backend_type) {
328 let success_rate = if stat.total_operations > 0 {
330 stat.successful_operations as f64 / stat.total_operations as f64
331 } else {
332 1.0 };
334
335 if success_rate > 0.8 {
336 #[cfg(feature = "gpu")]
337 log::debug!(
338 "Using {:?} context (success rate: {:.1}%)",
339 backend_type,
340 success_rate * 100.0
341 );
342 return Ok(Arc::clone(context));
343 }
344 }
345 }
346 }
347
348 Err(SpecialError::GpuNotAvailable(
349 "No healthy GPU contexts available".to_string(),
350 ))
351 }
352
353 pub fn record_operation(
355 &self,
356 backend_type: GpuBackend,
357 execution_time: Duration,
358 success: bool,
359 datasize: usize,
360 ) {
361 let mut stats = self.performance_stats.write().expect("Operation failed");
362 if let Some(stat) = stats.get_mut(&backend_type) {
363 stat.total_operations += 1;
364
365 if success {
366 stat.successful_operations += 1;
367 stat.total_execution_time += execution_time;
368 stat.average_execution_time =
369 stat.total_execution_time / stat.successful_operations as u32;
370 stat.total_data_transferred += datasize as u64;
371 } else {
372 stat.failed_operations += 1;
373 }
374
375 stat.memory_transfers += 1;
376 }
377 }
378
379 pub fn get_performance_stats(&self, backendtype: GpuBackend) -> Option<GpuPerformanceStats> {
381 let stats = self.performance_stats.read().expect("Operation failed");
382 stats.get(&backendtype).cloned()
383 }
384
385 pub fn get_device_info(&self) -> HashMap<GpuBackend, GpuDeviceInfo> {
387 self.device_info.read().expect("Operation failed").clone()
388 }
389
390 pub fn should_use_gpu(&self, arraysize: usize, data_typesize: usize) -> bool {
392 let min_elements = match data_typesize {
394 4 => 512, 8 => 256, _ => 1024, };
398
399 if arraysize < min_elements {
400 return false;
401 }
402
403 let auto_fallback = *self.auto_fallback_enabled.lock().expect("Operation failed");
405 if !auto_fallback {
406 return false;
407 }
408
409 let contexts = self.contexts.read().expect("Operation failed");
411 !contexts.is_empty()
412 }
413
414 pub fn set_auto_fallback(&self, enabled: bool) {
416 *self.auto_fallback_enabled.lock().expect("Operation failed") = enabled;
417 }
418
419 pub fn set_fallback_threshold(&self, threshold: usize) {
421 *self.fallback_threshold.lock().expect("Operation failed") = threshold;
422 }
423
424 fn query_opencl_device_info(&self, context: &Arc<GpuContext>) -> SpecialResult<GpuDeviceInfo> {
426 #[cfg(feature = "gpu")]
427 log::debug!("Querying OpenCL device properties...");
428
429 let estimated_memory = 2 * 1024 * 1024 * 1024; let estimated_compute_units = 16; Ok(GpuDeviceInfo {
433 device_id: 0,
434 device_name: format!("OpenCL GPU Device (Unknown)"),
435 memorysize: estimated_memory,
436 compute_units: estimated_compute_units,
437 max_workgroupsize: 256,
438 backend_type: GpuBackend::OpenCL,
439 is_available: true,
440 })
441 }
442
443 fn query_cuda_device_info(&self, context: &Arc<GpuContext>) -> SpecialResult<GpuDeviceInfo> {
445 #[cfg(feature = "gpu")]
446 log::debug!("Querying CUDA device properties...");
447
448 let estimated_memory = 4 * 1024 * 1024 * 1024; let estimated_compute_units = 64; Ok(GpuDeviceInfo {
452 device_id: 0,
453 device_name: format!("NVIDIA CUDA Device (Unknown)"),
454 memorysize: estimated_memory,
455 compute_units: estimated_compute_units,
456 max_workgroupsize: 1024,
457 backend_type: GpuBackend::Cuda,
458 is_available: true,
459 })
460 }
461
462 fn estimate_gpu_memory_opencl(&self) -> u64 {
464 2 * 1024 * 1024 * 1024
465 }
466 fn estimate_gpu_memory_cuda(&self) -> u64 {
467 4 * 1024 * 1024 * 1024
468 }
469 fn estimate_compute_units_opencl(&self) -> u32 {
470 32
471 }
472 fn estimate_compute_units_cuda(&self) -> u32 {
473 64
474 }
475 fn detect_gpu_vendor(&self) -> String {
476 "Unknown Vendor".to_string()
477 }
478 fn detect_nvidia_architecture(&self) -> String {
479 "Unknown Architecture".to_string()
480 }
481 fn get_system_memorysize(&self) -> u64 {
482 8 * 1024 * 1024 * 1024
483 }
484 fn is_likely_integrated_gpu(&self) -> bool {
485 false
486 }
487
488 pub fn get_performance_trends(&self) -> HashMap<GpuBackend, String> {
490 let stats = self.performance_stats.read().expect("Operation failed");
491 let mut trends = HashMap::new();
492
493 for (&backend_type, stat) in stats.iter() {
494 let trend_analysis = if stat.total_operations > 10 {
495 let success_rate = stat.successful_operations as f64 / stat.total_operations as f64;
496 let avg_throughput = if stat.average_execution_time.as_millis() > 0 {
497 1000.0 / stat.average_execution_time.as_millis() as f64
498 } else {
499 0.0
500 };
501
502 format!(
503 "Success: {:.1}%, Throughput: {:.1} ops/sec, Data: {} MB",
504 success_rate * 100.0,
505 avg_throughput,
506 stat.total_data_transferred / 1024 / 1024
507 )
508 } else {
509 "Insufficient data for trend analysis".to_string()
510 };
511 trends.insert(backend_type, trend_analysis);
512 }
513 trends
514 }
515
516 pub fn reset_performance_stats(&self) {
518 let mut stats = self.performance_stats.write().expect("Operation failed");
519 for stat in stats.values_mut() {
520 *stat = GpuPerformanceStats::default();
521 }
522 #[cfg(feature = "gpu")]
523 log::info!("GPU performance statistics reset");
524 }
525
526 pub fn get_performance_stats_all(&self) -> HashMap<GpuBackend, GpuPerformanceStats> {
528 self.performance_stats
529 .read()
530 .expect("Operation failed")
531 .clone()
532 }
533
534 pub fn get_system_report(&self) -> String {
536 let device_info = self.device_info.read().expect("Operation failed");
537 let stats = self.performance_stats.read().expect("Operation failed");
538
539 let mut report = String::new();
540 report.push_str("=== GPU System Report ===\n\n");
541
542 if device_info.is_empty() {
543 report.push_str("No GPU devices available.\n");
544 } else {
545 report.push_str(&format!("Found {} GPU device(s):\n\n", device_info.len()));
546
547 for (backend_type, info) in device_info.iter() {
548 report.push_str(&format!("Backend: {:?}\n", backend_type));
549 report.push_str(&format!(" Device: {}\n", info.device_name));
550 report.push_str(&format!(" Memory: {} MB\n", info.memorysize / 1024 / 1024));
551 report.push_str(&format!(" Compute Units: {}\n", info.compute_units));
552 report.push_str(&format!(
553 " Max Workgroup Size: {}\n",
554 info.max_workgroupsize
555 ));
556 report.push_str(&format!(" Available: {}\n", info.is_available));
557
558 if let Some(stat) = stats.get(backend_type) {
559 if stat.total_operations > 0 {
560 let success_rate =
561 stat.successful_operations as f64 / stat.total_operations as f64;
562 report.push_str(&format!(" Success Rate: {:.1}%\n", success_rate * 100.0));
563 report.push_str(&format!(
564 " Avg Execution Time: {:?}\n",
565 stat.average_execution_time
566 ));
567 report.push_str(&format!(
568 " Total Data Transferred: {} MB\n",
569 stat.total_data_transferred / 1024 / 1024
570 ));
571 } else {
572 report.push_str(" No operations recorded\n");
573 }
574 }
575 report.push('\n');
576 }
577 }
578
579 report
580 }
581}
582
583static GPU_POOL: std::sync::OnceLock<GpuContextPool> = std::sync::OnceLock::new();
585
586#[allow(dead_code)]
588pub fn get_gpu_pool() -> &'static GpuContextPool {
589 GPU_POOL.get_or_init(|| {
590 let pool = GpuContextPool::new();
591 if let Err(e) = pool.initialize() {
592 #[cfg(feature = "gpu")]
593 log::warn!("Failed to initialize GPU pool: {}", e);
594 }
595 pool
596 })
597}
598
599#[allow(dead_code)]
601pub fn initialize_gpu_system() -> SpecialResult<()> {
602 let pool = get_gpu_pool();
603 pool.initialize()
604}
605
606#[allow(dead_code)]
608pub fn get_best_gpu_context() -> SpecialResult<Arc<GpuContext>> {
609 get_gpu_pool().get_best_context()
610}
611
612#[allow(dead_code)]
614pub fn should_use_gpu_computation(_arraysize: usize, elementsize: usize) -> bool {
615 get_gpu_pool().should_use_gpu(_arraysize, elementsize)
616}
617
618#[allow(dead_code)]
620pub fn record_gpu_performance(
621 backend_type: GpuBackend,
622 execution_time: Duration,
623 success: bool,
624 datasize: usize,
625) {
626 get_gpu_pool().record_operation(backend_type, execution_time, success, datasize);
627}
628
629#[allow(dead_code)]
631pub fn validate_gpu_production_readiness() -> SpecialResult<String> {
632 let pool = get_gpu_pool();
633 let mut validation_report = String::new();
634
635 let device_info = pool.get_device_info();
637 if device_info.is_empty() {
638 validation_report.push_str("⚠️ No GPU devices detected\n");
639 validation_report.push_str(" Recommendation: GPU features will use CPU fallback\n\n");
640 } else {
641 validation_report.push_str(&format!(
642 "✅ {} GPU device(s) available\n",
643 device_info.len()
644 ));
645
646 for (backend, info) in device_info.iter() {
648 let memory_gb = info.memorysize as f64 / (1024.0 * 1024.0 * 1024.0);
649 validation_report.push_str(&format!(
650 " {:?}: {:.1} GB memory, {} compute units\n",
651 backend, memory_gb, info.compute_units
652 ));
653
654 if memory_gb < 2.0 {
655 validation_report
656 .push_str(" ⚠️ Low GPU memory may limit large array processing\n");
657 }
658 }
659 validation_report.push('\n');
660 }
661
662 let performance_trends = pool.get_performance_trends();
664 if !performance_trends.is_empty() {
665 validation_report.push_str("📊 Performance History:\n");
666 for (backend, trend) in performance_trends {
667 validation_report.push_str(&format!(" {:?}: {}\n", backend, trend));
668 }
669 validation_report.push('\n');
670 }
671
672 let config = pool.get_config();
674 validation_report.push_str("⚙️ Configuration:\n");
675 validation_report.push_str(&format!(
676 " Min array size for GPU: {}\n",
677 config.min_gpu_arraysize
678 ));
679 validation_report.push_str(&format!(
680 " Max memory usage: {:.0}%\n",
681 config.max_memory_usage_percent
682 ));
683 validation_report.push_str(&format!(
684 " Adaptive switching: {}\n",
685 config.enable_adaptive_switching
686 ));
687 validation_report.push_str(&format!(
688 " Preferred backend: {:?}\n",
689 config.preferred_backend
690 ));
691
692 validation_report.push_str("\n🎯 Recommendations:\n");
694 if device_info.is_empty() {
695 validation_report.push_str(" • Install GPU drivers for acceleration\n");
696 validation_report.push_str(" • Enable GPU features in scirs2-core\n");
697 } else {
698 validation_report.push_str(" • GPU infrastructure ready for production use\n");
699 validation_report.push_str(" • Monitor performance with get_system_report()\n");
700 validation_report.push_str(" • Adjust min_gpu_arraysize based on workload\n");
701 }
702
703 Ok(validation_report)
704}
705
706#[allow(dead_code)]
708pub fn enable_gpu_monitoring(_enablealerts: bool) -> SpecialResult<()> {
709 let pool = get_gpu_pool();
710 let mut config = pool.get_config();
711 config.enable_profiling = true;
712 pool.update_config(config);
713
714 #[cfg(feature = "gpu")]
715 {
716 if _enablealerts {
717 log::info!("GPU performance monitoring enabled with alerts");
718 } else {
719 log::info!("GPU performance monitoring enabled without alerts");
720 }
721 }
722
723 Ok(())
724}
725
726#[allow(dead_code)]
728pub fn get_gpu_resource_utilization() -> String {
729 let pool = get_gpu_pool();
730 let device_info = pool.get_device_info();
731 let stats = pool.get_performance_stats_all();
732
733 let mut report = String::new();
734 report.push_str("=== GPU Resource Utilization ===\n");
735
736 for (backend, info) in device_info.iter() {
737 if let Some(stat) = stats.get(backend) {
738 let memory_usage = (stat.peak_memory_usage as f64 / info.memorysize as f64) * 100.0;
739 let efficiency = if stat.total_operations > 0 {
740 (stat.successful_operations as f64 / stat.total_operations as f64) * 100.0
741 } else {
742 0.0
743 };
744
745 report.push_str(&format!("\n{:?}:\n", backend));
746 report.push_str(&format!(" Peak Memory Usage: {:.1}%\n", memory_usage));
747 report.push_str(&format!(" Success Rate: {:.1}%\n", efficiency));
748 report.push_str(&format!(
749 " Operations/sec: {:.1}\n",
750 stat.operations_per_second
751 ));
752 report.push_str(&format!(
753 " Cache Hit Rate: {:.1}%\n",
754 stat.cache_hit_rate * 100.0
755 ));
756
757 if let Some(ref error) = stat.last_error_message {
758 report.push_str(&format!(" Last Error: {}\n", error));
759 }
760 }
761 }
762
763 report
764}
765
766#[cfg(test)]
767mod tests {
768 use super::*;
769
770 #[test]
771 fn test_gpu_pool_creation() {
772 let pool = GpuContextPool::new();
773 assert!(pool.get_device_info().is_empty());
774 }
775
776 #[test]
777 fn test_should_use_gpu_logic() {
778 let pool = GpuContextPool::new();
779
780 assert!(!pool.should_use_gpu(100, 4));
782
783 let use_large_f32 = pool.should_use_gpu(1000, 4);
786 let use_large_f64 = pool.should_use_gpu(1000, 8);
787
788 let _ = (use_large_f32, use_large_f64);
791 }
792
793 #[test]
794 fn test_performance_stats() {
795 let pool = GpuContextPool::new();
796 let backend = GpuBackend::Wgpu;
797
798 assert!(pool.get_performance_stats(backend).is_none());
800
801 let _ = pool.initialize();
803 }
805}