Skip to main content

scirs2_special/
gpu_context_manager.rs

1//! Advanced GPU context management for special functions
2//!
3//! This module provides robust GPU context management with automatic
4//! fallback, resource pooling, and performance monitoring.
5
6use crate::error::{SpecialError, SpecialResult};
7use scirs2_core::gpu::{GpuBackend, GpuContext};
8use std::collections::HashMap;
9use std::sync::{Arc, Mutex, RwLock};
10use std::time::Duration;
11
12/// GPU device information and capabilities
13#[derive(Debug, Clone)]
14pub struct GpuDeviceInfo {
15    pub device_id: usize,
16    pub device_name: String,
17    pub memorysize: u64,
18    pub compute_units: u32,
19    pub max_workgroupsize: u32,
20    pub backend_type: GpuBackend,
21    pub is_available: bool,
22}
23
24/// Performance statistics for GPU operations
25#[derive(Debug, Clone, Default)]
26pub struct GpuPerformanceStats {
27    pub total_operations: u64,
28    pub successful_operations: u64,
29    pub failed_operations: u64,
30    pub total_execution_time: Duration,
31    pub average_execution_time: Duration,
32    pub memory_transfers: u64,
33    pub total_data_transferred: u64,
34    pub peak_memory_usage: u64,
35    pub cache_hit_rate: f64,
36    pub last_error_message: Option<String>,
37    pub operations_per_second: f64,
38}
39
40/// Production configuration for GPU operations
41#[derive(Debug, Clone)]
42pub struct GpuProductionConfig {
43    /// Minimum array size to consider GPU acceleration (default: 1000)
44    pub min_gpu_arraysize: usize,
45    /// Maximum GPU memory usage percentage (default: 80%)
46    pub max_memory_usage_percent: f32,
47    /// Enable automatic GPU/CPU switching based on performance (default: true)
48    pub enable_adaptive_switching: bool,
49    /// GPU warmup iterations for performance measurement (default: 3)
50    pub warmup_iterations: u32,
51    /// Maximum number of retry attempts for failed GPU operations (default: 3)
52    pub max_retry_attempts: u32,
53    /// Enable performance profiling and logging (default: false)
54    pub enable_profiling: bool,
55    /// Preferred GPU backend type (default: Auto)
56    pub preferred_backend: GpuBackend,
57}
58
59impl Default for GpuProductionConfig {
60    fn default() -> Self {
61        Self {
62            min_gpu_arraysize: 1000,
63            max_memory_usage_percent: 80.0,
64            enable_adaptive_switching: true,
65            warmup_iterations: 3,
66            max_retry_attempts: 3,
67            enable_profiling: false,
68            preferred_backend: GpuBackend::Cpu,
69        }
70    }
71}
72
73/// GPU context pool for managing multiple contexts
74pub struct GpuContextPool {
75    contexts: RwLock<HashMap<GpuBackend, Arc<GpuContext>>>,
76    device_info: RwLock<HashMap<GpuBackend, GpuDeviceInfo>>,
77    performance_stats: RwLock<HashMap<GpuBackend, GpuPerformanceStats>>,
78    fallback_threshold: Mutex<usize>,
79    auto_fallback_enabled: Mutex<bool>,
80    production_config: RwLock<GpuProductionConfig>,
81    memory_usage_tracker: RwLock<HashMap<GpuBackend, u64>>,
82}
83
84impl Default for GpuContextPool {
85    fn default() -> Self {
86        Self::new()
87    }
88}
89
90impl GpuContextPool {
91    /// Create a new GPU context pool with production configuration
92    pub fn new() -> Self {
93        Self {
94            contexts: RwLock::new(HashMap::new()),
95            device_info: RwLock::new(HashMap::new()),
96            performance_stats: RwLock::new(HashMap::new()),
97            fallback_threshold: Mutex::new(5), // Fall back after 5 consecutive failures
98            auto_fallback_enabled: Mutex::new(true),
99            production_config: RwLock::new(GpuProductionConfig::default()),
100            memory_usage_tracker: RwLock::new(HashMap::new()),
101        }
102    }
103
104    /// Create a new GPU context pool with custom production configuration
105    pub fn with_config(config: GpuProductionConfig) -> Self {
106        Self {
107            contexts: RwLock::new(HashMap::new()),
108            device_info: RwLock::new(HashMap::new()),
109            performance_stats: RwLock::new(HashMap::new()),
110            fallback_threshold: Mutex::new(config.max_retry_attempts as usize),
111            auto_fallback_enabled: Mutex::new(config.enable_adaptive_switching),
112            production_config: RwLock::new(config),
113            memory_usage_tracker: RwLock::new(HashMap::new()),
114        }
115    }
116
117    /// Update production configuration
118    pub fn update_config(&self, config: GpuProductionConfig) {
119        *self.production_config.write().expect("Operation failed") = config;
120    }
121
122    /// Get current production configuration
123    pub fn get_config(&self) -> GpuProductionConfig {
124        self.production_config
125            .read()
126            .expect("Operation failed")
127            .clone()
128    }
129
130    /// Initialize GPU context pool with device discovery
131    pub fn initialize(&self) -> SpecialResult<()> {
132        self.discover_devices()?;
133        self.create_contexts()?;
134        Ok(())
135    }
136
137    /// Discover available GPU devices
138    fn discover_devices(&self) -> SpecialResult<()> {
139        let mut device_info = self.device_info.write().expect("Operation failed");
140
141        // Try to discover WebGPU devices
142        if let Ok(info) = self.probe_webgpu_device() {
143            device_info.insert(GpuBackend::Wgpu, info);
144        }
145
146        // Try to discover OpenCL devices
147        if let Ok(info) = self.probe_opencl_device() {
148            device_info.insert(GpuBackend::OpenCL, info);
149        }
150
151        // Try to discover CUDA devices
152        if let Ok(info) = self.probe_cuda_device() {
153            device_info.insert(GpuBackend::Cuda, info);
154        }
155
156        if device_info.is_empty() {
157            #[cfg(feature = "gpu")]
158            log::warn!("No GPU devices discovered");
159        } else {
160            #[cfg(feature = "gpu")]
161            log::info!("Discovered {} GPU device(s)", device_info.len());
162        }
163
164        Ok(())
165    }
166
167    /// Probe WebGPU device capabilities
168    fn probe_webgpu_device(&self) -> SpecialResult<GpuDeviceInfo> {
169        // use scirs2_core::gpu;
170
171        match GpuContext::new(GpuBackend::Wgpu) {
172            Ok(_context) => {
173                let info = GpuDeviceInfo {
174                    device_id: 0,
175                    device_name: "WebGPU Device".to_string(),
176                    memorysize: 1024 * 1024 * 1024, // Assume 1GB
177                    compute_units: 32,              // Conservative estimate
178                    max_workgroupsize: 256,
179                    backend_type: GpuBackend::Wgpu,
180                    is_available: true,
181                };
182
183                #[cfg(feature = "gpu")]
184                log::info!("WebGPU device available: {}", info.device_name);
185
186                Ok(info)
187            }
188            Err(e) => {
189                #[cfg(feature = "gpu")]
190                log::debug!("WebGPU not available: {}", e);
191                Err(SpecialError::GpuNotAvailable(
192                    "WebGPU not available".to_string(),
193                ))
194            }
195        }
196    }
197
198    /// Probe OpenCL device capabilities with advanced detection
199    fn probe_opencl_device(&self) -> SpecialResult<GpuDeviceInfo> {
200        // use scirs2_core::gpu;
201
202        #[cfg(feature = "gpu")]
203        log::debug!("Probing OpenCL devices...");
204
205        // Try to create OpenCL context to test availability
206        match GpuContext::new(GpuBackend::OpenCL) {
207            Ok(context) => {
208                // Query OpenCL device properties if possible
209                let info = self
210                    .query_opencl_device_info(&std::sync::Arc::new(context))
211                    .unwrap_or_else(|_| {
212                        // Fallback to conservative defaults
213                        GpuDeviceInfo {
214                            device_id: 0,
215                            device_name: "OpenCL Device".to_string(),
216                            memorysize: 2 * 1024 * 1024 * 1024, // 2GB assumption
217                            compute_units: 16,                  // Conservative estimate
218                            max_workgroupsize: 256,
219                            backend_type: GpuBackend::OpenCL,
220                            is_available: true,
221                        }
222                    });
223
224                #[cfg(feature = "gpu")]
225                log::info!(
226                    "OpenCL device available: {} with {} compute units",
227                    info.device_name,
228                    info.compute_units
229                );
230
231                Ok(info)
232            }
233            Err(e) => {
234                #[cfg(feature = "gpu")]
235                log::debug!("OpenCL not available: {}", e);
236                Err(SpecialError::GpuNotAvailable(format!(
237                    "OpenCL not available: {}",
238                    e
239                )))
240            }
241        }
242    }
243
244    /// Probe CUDA device capabilities with NVIDIA GPU detection
245    fn probe_cuda_device(&self) -> SpecialResult<GpuDeviceInfo> {
246        // use scirs2_core::gpu;
247
248        #[cfg(feature = "gpu")]
249        log::debug!("Probing CUDA devices...");
250
251        // Try to create CUDA context to test availability
252        match GpuContext::new(GpuBackend::Cuda) {
253            Ok(context) => {
254                // Query CUDA device properties if possible
255                let info = self
256                    .query_cuda_device_info(&std::sync::Arc::new(context))
257                    .unwrap_or_else(|_| {
258                        // Fallback to conservative defaults for CUDA
259                        GpuDeviceInfo {
260                            device_id: 0,
261                            device_name: "NVIDIA CUDA Device".to_string(),
262                            memorysize: 4 * 1024 * 1024 * 1024, // 4GB assumption for CUDA
263                            compute_units: 64,                  // Higher for CUDA devices
264                            max_workgroupsize: 1024,            // CUDA supports larger workgroups
265                            backend_type: GpuBackend::Cuda,
266                            is_available: true,
267                        }
268                    });
269
270                #[cfg(feature = "gpu")]
271                log::info!(
272                    "CUDA device available: {} with {} SMs",
273                    info.device_name,
274                    info.compute_units
275                );
276
277                Ok(info)
278            }
279            Err(e) => {
280                #[cfg(feature = "gpu")]
281                log::debug!("CUDA not available: {}", e);
282                Err(SpecialError::GpuNotAvailable(format!(
283                    "CUDA not available: {}",
284                    e
285                )))
286            }
287        }
288    }
289
290    /// Create GPU contexts for discovered devices
291    fn create_contexts(&self) -> SpecialResult<()> {
292        let device_info = self.device_info.read().expect("Operation failed");
293        let mut contexts = self.contexts.write().expect("Operation failed");
294        let mut stats = self.performance_stats.write().expect("Operation failed");
295
296        for (&backend_type, info) in device_info.iter() {
297            if info.is_available {
298                match GpuContext::new(backend_type) {
299                    Ok(context) => {
300                        contexts.insert(backend_type, std::sync::Arc::new(context));
301                        stats.insert(backend_type, GpuPerformanceStats::default());
302
303                        #[cfg(feature = "gpu")]
304                        log::info!("Created GPU context for {:?}", backend_type);
305                    }
306                    Err(e) => {
307                        #[cfg(feature = "gpu")]
308                        log::warn!("Failed to create context for {:?}: {}", backend_type, e);
309                    }
310                }
311            }
312        }
313
314        Ok(())
315    }
316
317    /// Get the best available GPU context
318    pub fn get_best_context(&self) -> SpecialResult<Arc<GpuContext>> {
319        let contexts = self.contexts.read().expect("Operation failed");
320        let stats = self.performance_stats.read().expect("Operation failed");
321
322        // Prioritize based on performance stats and backend type
323        let preferred_order = [GpuBackend::Cuda, GpuBackend::Wgpu, GpuBackend::OpenCL];
324
325        for &backend_type in &preferred_order {
326            if let Some(context) = contexts.get(&backend_type) {
327                if let Some(stat) = stats.get(&backend_type) {
328                    // Check if context is healthy (success rate > 80%)
329                    let success_rate = if stat.total_operations > 0 {
330                        stat.successful_operations as f64 / stat.total_operations as f64
331                    } else {
332                        1.0 // No operations yet, assume healthy
333                    };
334
335                    if success_rate > 0.8 {
336                        #[cfg(feature = "gpu")]
337                        log::debug!(
338                            "Using {:?} context (success rate: {:.1}%)",
339                            backend_type,
340                            success_rate * 100.0
341                        );
342                        return Ok(Arc::clone(context));
343                    }
344                }
345            }
346        }
347
348        Err(SpecialError::GpuNotAvailable(
349            "No healthy GPU contexts available".to_string(),
350        ))
351    }
352
353    /// Record operation performance
354    pub fn record_operation(
355        &self,
356        backend_type: GpuBackend,
357        execution_time: Duration,
358        success: bool,
359        datasize: usize,
360    ) {
361        let mut stats = self.performance_stats.write().expect("Operation failed");
362        if let Some(stat) = stats.get_mut(&backend_type) {
363            stat.total_operations += 1;
364
365            if success {
366                stat.successful_operations += 1;
367                stat.total_execution_time += execution_time;
368                stat.average_execution_time =
369                    stat.total_execution_time / stat.successful_operations as u32;
370                stat.total_data_transferred += datasize as u64;
371            } else {
372                stat.failed_operations += 1;
373            }
374
375            stat.memory_transfers += 1;
376        }
377    }
378
379    /// Get performance statistics for a backend
380    pub fn get_performance_stats(&self, backendtype: GpuBackend) -> Option<GpuPerformanceStats> {
381        let stats = self.performance_stats.read().expect("Operation failed");
382        stats.get(&backendtype).cloned()
383    }
384
385    /// Get all available device information
386    pub fn get_device_info(&self) -> HashMap<GpuBackend, GpuDeviceInfo> {
387        self.device_info.read().expect("Operation failed").clone()
388    }
389
390    /// Check if GPU acceleration should be used for given array size
391    pub fn should_use_gpu(&self, arraysize: usize, data_typesize: usize) -> bool {
392        // Only use GPU for sufficiently large arrays
393        let min_elements = match data_typesize {
394            4 => 512,  // f32
395            8 => 256,  // f64
396            _ => 1024, // other types
397        };
398
399        if arraysize < min_elements {
400            return false;
401        }
402
403        // Check if auto fallback is enabled and we have healthy contexts
404        let auto_fallback = *self.auto_fallback_enabled.lock().expect("Operation failed");
405        if !auto_fallback {
406            return false;
407        }
408
409        // Check if we have any available contexts
410        let contexts = self.contexts.read().expect("Operation failed");
411        !contexts.is_empty()
412    }
413
414    /// Enable or disable automatic fallback to CPU
415    pub fn set_auto_fallback(&self, enabled: bool) {
416        *self.auto_fallback_enabled.lock().expect("Operation failed") = enabled;
417    }
418
419    /// Set the threshold for fallback after consecutive failures
420    pub fn set_fallback_threshold(&self, threshold: usize) {
421        *self.fallback_threshold.lock().expect("Operation failed") = threshold;
422    }
423
424    /// Query OpenCL device information with detailed properties
425    fn query_opencl_device_info(&self, context: &Arc<GpuContext>) -> SpecialResult<GpuDeviceInfo> {
426        #[cfg(feature = "gpu")]
427        log::debug!("Querying OpenCL device properties...");
428
429        let estimated_memory = 2 * 1024 * 1024 * 1024; // 2GB default
430        let estimated_compute_units = 16; // Default estimate
431
432        Ok(GpuDeviceInfo {
433            device_id: 0,
434            device_name: format!("OpenCL GPU Device (Unknown)"),
435            memorysize: estimated_memory,
436            compute_units: estimated_compute_units,
437            max_workgroupsize: 256,
438            backend_type: GpuBackend::OpenCL,
439            is_available: true,
440        })
441    }
442
443    /// Query CUDA device information with detailed properties
444    fn query_cuda_device_info(&self, context: &Arc<GpuContext>) -> SpecialResult<GpuDeviceInfo> {
445        #[cfg(feature = "gpu")]
446        log::debug!("Querying CUDA device properties...");
447
448        let estimated_memory = 4 * 1024 * 1024 * 1024; // 4GB default
449        let estimated_compute_units = 64; // Default estimate
450
451        Ok(GpuDeviceInfo {
452            device_id: 0,
453            device_name: format!("NVIDIA CUDA Device (Unknown)"),
454            memorysize: estimated_memory,
455            compute_units: estimated_compute_units,
456            max_workgroupsize: 1024,
457            backend_type: GpuBackend::Cuda,
458            is_available: true,
459        })
460    }
461
462    /// Helper functions for device estimation
463    fn estimate_gpu_memory_opencl(&self) -> u64 {
464        2 * 1024 * 1024 * 1024
465    }
466    fn estimate_gpu_memory_cuda(&self) -> u64 {
467        4 * 1024 * 1024 * 1024
468    }
469    fn estimate_compute_units_opencl(&self) -> u32 {
470        32
471    }
472    fn estimate_compute_units_cuda(&self) -> u32 {
473        64
474    }
475    fn detect_gpu_vendor(&self) -> String {
476        "Unknown Vendor".to_string()
477    }
478    fn detect_nvidia_architecture(&self) -> String {
479        "Unknown Architecture".to_string()
480    }
481    fn get_system_memorysize(&self) -> u64 {
482        8 * 1024 * 1024 * 1024
483    }
484    fn is_likely_integrated_gpu(&self) -> bool {
485        false
486    }
487
488    /// Advanced performance monitoring with trend analysis
489    pub fn get_performance_trends(&self) -> HashMap<GpuBackend, String> {
490        let stats = self.performance_stats.read().expect("Operation failed");
491        let mut trends = HashMap::new();
492
493        for (&backend_type, stat) in stats.iter() {
494            let trend_analysis = if stat.total_operations > 10 {
495                let success_rate = stat.successful_operations as f64 / stat.total_operations as f64;
496                let avg_throughput = if stat.average_execution_time.as_millis() > 0 {
497                    1000.0 / stat.average_execution_time.as_millis() as f64
498                } else {
499                    0.0
500                };
501
502                format!(
503                    "Success: {:.1}%, Throughput: {:.1} ops/sec, Data: {} MB",
504                    success_rate * 100.0,
505                    avg_throughput,
506                    stat.total_data_transferred / 1024 / 1024
507                )
508            } else {
509                "Insufficient data for trend analysis".to_string()
510            };
511            trends.insert(backend_type, trend_analysis);
512        }
513        trends
514    }
515
516    /// Clear performance statistics
517    pub fn reset_performance_stats(&self) {
518        let mut stats = self.performance_stats.write().expect("Operation failed");
519        for stat in stats.values_mut() {
520            *stat = GpuPerformanceStats::default();
521        }
522        #[cfg(feature = "gpu")]
523        log::info!("GPU performance statistics reset");
524    }
525
526    /// Get all performance statistics
527    pub fn get_performance_stats_all(&self) -> HashMap<GpuBackend, GpuPerformanceStats> {
528        self.performance_stats
529            .read()
530            .expect("Operation failed")
531            .clone()
532    }
533
534    /// Get comprehensive system report"
535    pub fn get_system_report(&self) -> String {
536        let device_info = self.device_info.read().expect("Operation failed");
537        let stats = self.performance_stats.read().expect("Operation failed");
538
539        let mut report = String::new();
540        report.push_str("=== GPU System Report ===\n\n");
541
542        if device_info.is_empty() {
543            report.push_str("No GPU devices available.\n");
544        } else {
545            report.push_str(&format!("Found {} GPU device(s):\n\n", device_info.len()));
546
547            for (backend_type, info) in device_info.iter() {
548                report.push_str(&format!("Backend: {:?}\n", backend_type));
549                report.push_str(&format!("  Device: {}\n", info.device_name));
550                report.push_str(&format!("  Memory: {} MB\n", info.memorysize / 1024 / 1024));
551                report.push_str(&format!("  Compute Units: {}\n", info.compute_units));
552                report.push_str(&format!(
553                    "  Max Workgroup Size: {}\n",
554                    info.max_workgroupsize
555                ));
556                report.push_str(&format!("  Available: {}\n", info.is_available));
557
558                if let Some(stat) = stats.get(backend_type) {
559                    if stat.total_operations > 0 {
560                        let success_rate =
561                            stat.successful_operations as f64 / stat.total_operations as f64;
562                        report.push_str(&format!("  Success Rate: {:.1}%\n", success_rate * 100.0));
563                        report.push_str(&format!(
564                            "  Avg Execution Time: {:?}\n",
565                            stat.average_execution_time
566                        ));
567                        report.push_str(&format!(
568                            "  Total Data Transferred: {} MB\n",
569                            stat.total_data_transferred / 1024 / 1024
570                        ));
571                    } else {
572                        report.push_str("  No operations recorded\n");
573                    }
574                }
575                report.push('\n');
576            }
577        }
578
579        report
580    }
581}
582
583/// Global GPU context pool instance
584static GPU_POOL: std::sync::OnceLock<GpuContextPool> = std::sync::OnceLock::new();
585
586/// Get the global GPU context pool
587#[allow(dead_code)]
588pub fn get_gpu_pool() -> &'static GpuContextPool {
589    GPU_POOL.get_or_init(|| {
590        let pool = GpuContextPool::new();
591        if let Err(e) = pool.initialize() {
592            #[cfg(feature = "gpu")]
593            log::warn!("Failed to initialize GPU pool: {}", e);
594        }
595        pool
596    })
597}
598
599/// Initialize the global GPU context pool
600#[allow(dead_code)]
601pub fn initialize_gpu_system() -> SpecialResult<()> {
602    let pool = get_gpu_pool();
603    pool.initialize()
604}
605
606/// Get the best available GPU context from the global pool
607#[allow(dead_code)]
608pub fn get_best_gpu_context() -> SpecialResult<Arc<GpuContext>> {
609    get_gpu_pool().get_best_context()
610}
611
612/// Check if GPU should be used for computation
613#[allow(dead_code)]
614pub fn should_use_gpu_computation(_arraysize: usize, elementsize: usize) -> bool {
615    get_gpu_pool().should_use_gpu(_arraysize, elementsize)
616}
617
618/// Record GPU operation performance
619#[allow(dead_code)]
620pub fn record_gpu_performance(
621    backend_type: GpuBackend,
622    execution_time: Duration,
623    success: bool,
624    datasize: usize,
625) {
626    get_gpu_pool().record_operation(backend_type, execution_time, success, datasize);
627}
628
629/// Validate GPU infrastructure for production use
630#[allow(dead_code)]
631pub fn validate_gpu_production_readiness() -> SpecialResult<String> {
632    let pool = get_gpu_pool();
633    let mut validation_report = String::new();
634
635    // Check device availability
636    let device_info = pool.get_device_info();
637    if device_info.is_empty() {
638        validation_report.push_str("⚠️  No GPU devices detected\n");
639        validation_report.push_str("   Recommendation: GPU features will use CPU fallback\n\n");
640    } else {
641        validation_report.push_str(&format!(
642            "✅ {} GPU device(s) available\n",
643            device_info.len()
644        ));
645
646        // Check memory capacity
647        for (backend, info) in device_info.iter() {
648            let memory_gb = info.memorysize as f64 / (1024.0 * 1024.0 * 1024.0);
649            validation_report.push_str(&format!(
650                "   {:?}: {:.1} GB memory, {} compute units\n",
651                backend, memory_gb, info.compute_units
652            ));
653
654            if memory_gb < 2.0 {
655                validation_report
656                    .push_str("   ⚠️  Low GPU memory may limit large array processing\n");
657            }
658        }
659        validation_report.push('\n');
660    }
661
662    // Check performance history
663    let performance_trends = pool.get_performance_trends();
664    if !performance_trends.is_empty() {
665        validation_report.push_str("📊 Performance History:\n");
666        for (backend, trend) in performance_trends {
667            validation_report.push_str(&format!("   {:?}: {}\n", backend, trend));
668        }
669        validation_report.push('\n');
670    }
671
672    // Configuration validation
673    let config = pool.get_config();
674    validation_report.push_str("⚙️  Configuration:\n");
675    validation_report.push_str(&format!(
676        "   Min array size for GPU: {}\n",
677        config.min_gpu_arraysize
678    ));
679    validation_report.push_str(&format!(
680        "   Max memory usage: {:.0}%\n",
681        config.max_memory_usage_percent
682    ));
683    validation_report.push_str(&format!(
684        "   Adaptive switching: {}\n",
685        config.enable_adaptive_switching
686    ));
687    validation_report.push_str(&format!(
688        "   Preferred backend: {:?}\n",
689        config.preferred_backend
690    ));
691
692    // Recommendations
693    validation_report.push_str("\n🎯 Recommendations:\n");
694    if device_info.is_empty() {
695        validation_report.push_str("   • Install GPU drivers for acceleration\n");
696        validation_report.push_str("   • Enable GPU features in scirs2-core\n");
697    } else {
698        validation_report.push_str("   • GPU infrastructure ready for production use\n");
699        validation_report.push_str("   • Monitor performance with get_system_report()\n");
700        validation_report.push_str("   • Adjust min_gpu_arraysize based on workload\n");
701    }
702
703    Ok(validation_report)
704}
705
706/// Enable production monitoring with performance alerts
707#[allow(dead_code)]
708pub fn enable_gpu_monitoring(_enablealerts: bool) -> SpecialResult<()> {
709    let pool = get_gpu_pool();
710    let mut config = pool.get_config();
711    config.enable_profiling = true;
712    pool.update_config(config);
713
714    #[cfg(feature = "gpu")]
715    {
716        if _enablealerts {
717            log::info!("GPU performance monitoring enabled with alerts");
718        } else {
719            log::info!("GPU performance monitoring enabled without alerts");
720        }
721    }
722
723    Ok(())
724}
725
726/// Get GPU resource utilization report
727#[allow(dead_code)]
728pub fn get_gpu_resource_utilization() -> String {
729    let pool = get_gpu_pool();
730    let device_info = pool.get_device_info();
731    let stats = pool.get_performance_stats_all();
732
733    let mut report = String::new();
734    report.push_str("=== GPU Resource Utilization ===\n");
735
736    for (backend, info) in device_info.iter() {
737        if let Some(stat) = stats.get(backend) {
738            let memory_usage = (stat.peak_memory_usage as f64 / info.memorysize as f64) * 100.0;
739            let efficiency = if stat.total_operations > 0 {
740                (stat.successful_operations as f64 / stat.total_operations as f64) * 100.0
741            } else {
742                0.0
743            };
744
745            report.push_str(&format!("\n{:?}:\n", backend));
746            report.push_str(&format!("  Peak Memory Usage: {:.1}%\n", memory_usage));
747            report.push_str(&format!("  Success Rate: {:.1}%\n", efficiency));
748            report.push_str(&format!(
749                "  Operations/sec: {:.1}\n",
750                stat.operations_per_second
751            ));
752            report.push_str(&format!(
753                "  Cache Hit Rate: {:.1}%\n",
754                stat.cache_hit_rate * 100.0
755            ));
756
757            if let Some(ref error) = stat.last_error_message {
758                report.push_str(&format!("  Last Error: {}\n", error));
759            }
760        }
761    }
762
763    report
764}
765
766#[cfg(test)]
767mod tests {
768    use super::*;
769
770    #[test]
771    fn test_gpu_pool_creation() {
772        let pool = GpuContextPool::new();
773        assert!(pool.get_device_info().is_empty());
774    }
775
776    #[test]
777    fn test_should_use_gpu_logic() {
778        let pool = GpuContextPool::new();
779
780        // Small arrays should not use GPU
781        assert!(!pool.should_use_gpu(100, 4));
782
783        // Large arrays might use GPU (depends on availability)
784        // This test doesn't guarantee GPU availability, so we just check the logic
785        let use_large_f32 = pool.should_use_gpu(1000, 4);
786        let use_large_f64 = pool.should_use_gpu(1000, 8);
787
788        // Results depend on GPU availability, but the calls should not panic
789        // Just ensuring the variables are used (the fact that test runs means no panic)
790        let _ = (use_large_f32, use_large_f64);
791    }
792
793    #[test]
794    fn test_performance_stats() {
795        let pool = GpuContextPool::new();
796        let backend = GpuBackend::Wgpu;
797
798        // Initial stats should be None (no context created)
799        assert!(pool.get_performance_stats(backend).is_none());
800
801        // After initialization, stats might be available
802        let _ = pool.initialize();
803        // Note: We can't guarantee GPU availability in tests
804    }
805}