#[derive(Debug, Clone)]
pub struct GpuConfig {
pub device_id: i32,
pub enable_mixed_precision: bool,
pub enable_tensor_cores: bool,
pub batch_size: usize,
pub memory_pool_size: usize,
pub stream_count: usize,
pub enable_peer_access: bool,
pub enable_unified_memory: bool,
pub enable_async_execution: bool,
pub enable_multi_gpu: bool,
pub preferred_gpu_ids: Vec<i32>,
pub dynamic_batch_sizing: bool,
pub enable_memory_compression: bool,
pub kernel_cache_size: usize,
pub optimization_level: GpuOptimization,
pub precision_mode: GpuPrecision,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum GpuOptimization {
Debug, Balanced, Performance, Extreme, }
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum GpuPrecision {
FP32, FP16, Mixed, INT8, Adaptive, }
impl Default for GpuConfig {
fn default() -> Self {
Self {
device_id: 0,
enable_mixed_precision: true,
enable_tensor_cores: true,
batch_size: 1024,
memory_pool_size: 1024 * 1024 * 1024, stream_count: 4,
enable_peer_access: false,
enable_unified_memory: false,
enable_async_execution: true,
enable_multi_gpu: false,
preferred_gpu_ids: vec![0],
dynamic_batch_sizing: true,
enable_memory_compression: false,
kernel_cache_size: 100, optimization_level: GpuOptimization::Balanced,
precision_mode: GpuPrecision::FP32,
}
}
}
impl GpuConfig {
pub fn performance() -> Self {
Self {
optimization_level: GpuOptimization::Performance,
enable_mixed_precision: true,
enable_tensor_cores: true,
batch_size: 2048,
enable_async_execution: true,
dynamic_batch_sizing: true,
..Default::default()
}
}
pub fn debug() -> Self {
Self {
optimization_level: GpuOptimization::Debug,
enable_mixed_precision: false,
enable_tensor_cores: false,
batch_size: 128,
enable_async_execution: false,
dynamic_batch_sizing: false,
..Default::default()
}
}
pub fn memory_efficient() -> Self {
Self {
enable_memory_compression: true,
memory_pool_size: 512 * 1024 * 1024, precision_mode: GpuPrecision::FP16,
batch_size: 512,
..Default::default()
}
}
pub fn validate(&self) -> Result<(), String> {
if self.batch_size == 0 {
return Err("Batch size must be greater than 0".to_string());
}
if self.memory_pool_size < 1024 * 1024 {
return Err("Memory pool size must be at least 1MB".to_string());
}
if self.stream_count == 0 {
return Err("Stream count must be greater than 0".to_string());
}
if self.kernel_cache_size == 0 {
return Err("Kernel cache size must be greater than 0".to_string());
}
if self.preferred_gpu_ids.is_empty() {
return Err("At least one preferred GPU ID must be specified".to_string());
}
Ok(())
}
}
impl GpuOptimization {
pub fn level(&self) -> u32 {
match self {
GpuOptimization::Debug => 0,
GpuOptimization::Balanced => 1,
GpuOptimization::Performance => 2,
GpuOptimization::Extreme => 3,
}
}
pub fn debug_enabled(&self) -> bool {
matches!(self, GpuOptimization::Debug | GpuOptimization::Balanced)
}
}
impl GpuPrecision {
pub fn bytes_per_element(&self) -> usize {
match self {
GpuPrecision::FP32 => 4,
GpuPrecision::FP16 => 2,
GpuPrecision::Mixed => 4, GpuPrecision::INT8 => 1,
GpuPrecision::Adaptive => 4, }
}
pub fn supports_tensor_cores(&self) -> bool {
matches!(self, GpuPrecision::FP16 | GpuPrecision::Mixed)
}
}