pub mod backend_management;
pub mod compression;
pub mod memory_management;
pub use backend_management::{
BackendCapabilities, BackendPerformanceProfile, GpuIoProcessor, GpuWorkloadType,
};
pub use compression::{CompressionStats, GpuCompressionProcessor};
pub use memory_management::{
AdvancedGpuMemoryPool, AllocationStats, BufferMetadata, FragmentationManager, GlobalPoolStats,
GpuMemoryPoolManager, MemoryType, PoolConfig, PoolStats, PooledBuffer,
};
use crate::error::Result;
use scirs2_core::gpu::{GpuBackend, GpuDataType, GpuDevice};
use scirs2_core::ndarray::{Array1, ArrayView1};
#[derive(Debug)]
pub struct UnifiedGpuProcessor {
io_processor: GpuIoProcessor,
compression_processor: GpuCompressionProcessor,
memory_manager: GpuMemoryPoolManager,
}
impl UnifiedGpuProcessor {
pub fn new() -> Result<Self> {
let io_processor = GpuIoProcessor::new()?;
let compression_processor = GpuCompressionProcessor::new()?;
let memory_manager = GpuMemoryPoolManager::new(io_processor.device.clone())?;
Ok(Self {
io_processor,
compression_processor,
memory_manager,
})
}
pub fn with_backend(backend: GpuBackend) -> Result<Self> {
let io_processor = GpuIoProcessor::with_backend(backend)?;
let compression_processor = GpuCompressionProcessor::new()?;
let memory_manager = GpuMemoryPoolManager::new(io_processor.device.clone())?;
Ok(Self {
io_processor,
compression_processor,
memory_manager,
})
}
pub fn backend(&self) -> GpuBackend {
self.io_processor.backend()
}
pub fn get_capabilities(&self) -> Result<GpuCapabilities> {
let backend_caps = self.io_processor.get_backend_capabilities()?;
let compression_stats = self.compression_processor.get_performance_stats();
let memory_stats = self.memory_manager.get_global_stats();
Ok(GpuCapabilities {
backend: backend_caps.backend,
memory_gb: backend_caps.memory_gb,
compute_units: backend_caps.compute_units,
supports_fp64: backend_caps.supports_fp64,
supports_fp16: backend_caps.supports_fp16,
compression_throughput_gbps: compression_stats.estimated_throughput_gbps,
memory_pools: memory_stats.pool_count,
total_pool_size: memory_stats.total_pool_size,
performance_score: self.calculate_performance_score(&backend_caps),
})
}
pub fn compress<T: GpuDataType>(
&self,
data: &ArrayView1<T>,
algorithm: crate::compression::CompressionAlgorithm,
level: Option<u32>,
) -> Result<Vec<u8>> {
self.compression_processor
.compress_gpu(data, algorithm, level)
}
pub fn decompress<T: GpuDataType>(
&self,
compressed_data: &[u8],
algorithm: crate::compression::CompressionAlgorithm,
expected_size: usize,
) -> Result<Array1<T>> {
self.compression_processor
.decompress_gpu(compressed_data, algorithm, expected_size)
}
pub fn allocate_buffer(
&mut self,
size: usize,
memory_type: MemoryType,
) -> Result<PooledBuffer> {
self.memory_manager.allocate(size, memory_type)
}
pub fn deallocate_buffer(
&mut self,
buffer: PooledBuffer,
memory_type: MemoryType,
) -> Result<()> {
self.memory_manager.deallocate(buffer, memory_type)
}
pub fn get_performance_stats(&self) -> UnifiedGpuStats {
let backend_caps = self
.io_processor
.get_backend_capabilities()
.unwrap_or_else(|_| BackendCapabilities {
backend: GpuBackend::Cpu,
memory_gb: 1.0,
max_work_group_size: 64,
supports_fp64: false,
supports_fp16: false,
compute_units: 1,
max_allocation_size: 1024 * 1024,
local_memory_size: 64 * 1024,
});
let compression_stats = self.compression_processor.get_performance_stats();
let memory_stats = self.memory_manager.get_global_stats();
UnifiedGpuStats {
backend: backend_caps.backend,
compression_stats,
memory_stats: memory_stats.clone(),
overall_efficiency: self.calculate_efficiency_score(&memory_stats),
}
}
pub fn maintenance(&mut self) -> Result<MaintenanceReport> {
let freed_buffers = self.memory_manager.garbage_collect_all()?;
Ok(MaintenanceReport {
freed_buffers,
timestamp: std::time::Instant::now(),
})
}
pub fn optimize_for_workload(&mut self, workload: GpuWorkloadType) -> Result<()> {
match workload {
GpuWorkloadType::MachineLearning => {
let pool_size = 512 * 1024 * 1024; self.memory_manager
.create_pool(pool_size, MemoryType::Device)?;
}
GpuWorkloadType::ImageProcessing => {
let pool_size = 256 * 1024 * 1024; self.memory_manager
.create_pool(pool_size, MemoryType::Unified)?;
}
GpuWorkloadType::Compression => {
let pool_size = 128 * 1024 * 1024; self.memory_manager
.create_pool(pool_size, MemoryType::Pinned)?;
}
GpuWorkloadType::GeneralCompute => {
let pool_size = 256 * 1024 * 1024; self.memory_manager
.create_pool(pool_size, MemoryType::Device)?;
}
}
Ok(())
}
fn calculate_performance_score(&self, caps: &BackendCapabilities) -> f64 {
let memory_score = (caps.memory_gb / 16.0).min(1.0); let compute_score = (caps.compute_units as f64 / 64.0).min(1.0); let feature_score = if caps.supports_fp64 && caps.supports_fp16 {
1.0
} else {
0.7
};
(memory_score + compute_score + feature_score) / 3.0
}
fn calculate_efficiency_score(&self, memory_stats: &GlobalPoolStats) -> f64 {
let allocation_efficiency = memory_stats.global_allocation_stats.get_cache_hit_rate();
let fragmentation_penalty = 1.0 - memory_stats.average_fragmentation.min(1.0);
(allocation_efficiency + fragmentation_penalty) / 2.0
}
}
impl Default for UnifiedGpuProcessor {
fn default() -> Self {
Self::new().unwrap_or_else(|_| {
let device = GpuDevice::new(GpuBackend::Cpu, 0);
Self {
io_processor: GpuIoProcessor::default(),
compression_processor: GpuCompressionProcessor::default(),
memory_manager: GpuMemoryPoolManager::new(device)
.unwrap_or_else(|_| panic!("Failed to create fallback GPU memory manager")),
}
})
}
}
#[derive(Debug, Clone)]
pub struct GpuCapabilities {
pub backend: GpuBackend,
pub memory_gb: f64,
pub compute_units: usize,
pub supports_fp64: bool,
pub supports_fp16: bool,
pub compression_throughput_gbps: f64,
pub memory_pools: usize,
pub total_pool_size: usize,
pub performance_score: f64,
}
impl GpuCapabilities {
pub fn is_hpc_capable(&self) -> bool {
self.memory_gb >= 4.0
&& self.compute_units >= 16
&& self.supports_fp64
&& self.performance_score >= 0.7
}
pub fn is_ml_capable(&self) -> bool {
self.memory_gb >= 6.0
&& self.compute_units >= 32
&& (self.supports_fp16 || self.supports_fp64)
&& self.performance_score >= 0.6
}
pub fn get_recommended_workloads(&self) -> Vec<GpuWorkloadType> {
let mut workloads = Vec::new();
if self.is_ml_capable() {
workloads.push(GpuWorkloadType::MachineLearning);
}
if self.memory_gb >= 2.0 && self.compute_units >= 8 {
workloads.push(GpuWorkloadType::ImageProcessing);
}
if self.compression_throughput_gbps >= 1.0 {
workloads.push(GpuWorkloadType::Compression);
}
workloads.push(GpuWorkloadType::GeneralCompute);
workloads
}
}
#[derive(Debug, Clone)]
pub struct UnifiedGpuStats {
pub backend: GpuBackend,
pub compression_stats: CompressionStats,
pub memory_stats: GlobalPoolStats,
pub overall_efficiency: f64,
}
#[derive(Debug)]
pub struct MaintenanceReport {
pub freed_buffers: usize,
pub timestamp: std::time::Instant,
}
pub mod utils {
use super::*;
pub fn is_gpu_available() -> bool {
GpuIoProcessor::list_available_backends()
.iter()
.any(|&backend| backend != GpuBackend::Cpu)
}
pub fn get_best_backend_for_workload(workload: GpuWorkloadType) -> Result<GpuBackend> {
GpuIoProcessor::get_optimal_backend_for_workload(workload)
}
pub fn create_optimized_processor(workload: GpuWorkloadType) -> Result<UnifiedGpuProcessor> {
let backend = get_best_backend_for_workload(workload)?;
let mut processor = UnifiedGpuProcessor::with_backend(backend)?;
processor.optimize_for_workload(workload)?;
Ok(processor)
}
pub fn benchmark_gpu_performance(processor: &UnifiedGpuProcessor) -> GpuBenchmarkResults {
let capabilities = processor
.get_capabilities()
.unwrap_or_else(|_| GpuCapabilities {
backend: GpuBackend::Cpu,
memory_gb: 1.0,
compute_units: 1,
supports_fp64: false,
supports_fp16: false,
compression_throughput_gbps: 0.1,
memory_pools: 1,
total_pool_size: 1024 * 1024,
performance_score: 0.1,
});
GpuBenchmarkResults {
backend: capabilities.backend,
memory_bandwidth_gbps: capabilities.memory_gb * 0.8, compute_throughput: capabilities.compute_units as f64 * 100.0, compression_throughput: capabilities.compression_throughput_gbps,
overall_score: capabilities.performance_score,
}
}
}
#[derive(Debug, Clone)]
pub struct GpuBenchmarkResults {
pub backend: GpuBackend,
pub memory_bandwidth_gbps: f64,
pub compute_throughput: f64,
pub compression_throughput: f64,
pub overall_score: f64,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_unified_processor_creation() {
let processor = UnifiedGpuProcessor::default();
let backend = processor.backend();
match backend {
GpuBackend::Cpu
| GpuBackend::Metal
| GpuBackend::OpenCL
| GpuBackend::Cuda
| GpuBackend::Rocm
| GpuBackend::Wgpu => {}
}
}
#[test]
fn test_gpu_availability_check() {
}
#[test]
fn test_gpu_capabilities() {
let processor = UnifiedGpuProcessor::default();
let capabilities = processor.get_capabilities();
assert!(capabilities.is_ok());
let caps = capabilities.expect("Operation failed");
assert!(caps.memory_gb > 0.0);
assert!(caps.compute_units > 0);
}
#[test]
fn test_workload_optimization() {
let mut processor = UnifiedGpuProcessor::default();
let result = processor.optimize_for_workload(GpuWorkloadType::MachineLearning);
assert!(result.is_ok());
}
#[test]
fn test_performance_benchmarking() {
let processor = UnifiedGpuProcessor::default();
let benchmark = utils::benchmark_gpu_performance(&processor);
assert!(benchmark.overall_score >= 0.0);
assert!(benchmark.memory_bandwidth_gbps >= 0.0);
}
#[test]
fn test_maintenance_operations() {
let mut processor = UnifiedGpuProcessor::default();
let report = processor.maintenance();
assert!(report.is_ok());
}
}