oxirs_vec/gpu/
config.rs

1//! GPU configuration structures and enums
2
3/// Configuration for GPU operations
4#[derive(Debug, Clone)]
5pub struct GpuConfig {
6    pub device_id: i32,
7    pub enable_mixed_precision: bool,
8    pub enable_tensor_cores: bool,
9    pub batch_size: usize,
10    pub memory_pool_size: usize,
11    pub stream_count: usize,
12    pub enable_peer_access: bool,
13    pub enable_unified_memory: bool,
14    pub enable_async_execution: bool,
15    pub enable_multi_gpu: bool,
16    pub preferred_gpu_ids: Vec<i32>,
17    pub dynamic_batch_sizing: bool,
18    pub enable_memory_compression: bool,
19    pub kernel_cache_size: usize,
20    pub optimization_level: OptimizationLevel,
21    pub precision_mode: PrecisionMode,
22}
23
24/// GPU optimization levels
25#[derive(Debug, Clone, Copy, PartialEq)]
26pub enum OptimizationLevel {
27    Debug,       // Maximum debugging, minimal optimization
28    Balanced,    // Good balance of performance and debugging
29    Performance, // Maximum performance, minimal debugging
30    Extreme,     // Aggressive optimizations, may reduce precision
31}
32
33/// Precision modes for GPU computations
34#[derive(Debug, Clone, Copy, PartialEq)]
35pub enum PrecisionMode {
36    FP32,     // Single precision
37    FP16,     // Half precision
38    Mixed,    // Mixed precision (FP16 for compute, FP32 for storage)
39    INT8,     // 8-bit integer quantization
40    Adaptive, // Adaptive precision based on data characteristics
41}
42
43impl Default for GpuConfig {
44    fn default() -> Self {
45        Self {
46            device_id: 0,
47            enable_mixed_precision: true,
48            enable_tensor_cores: true,
49            batch_size: 1024,
50            memory_pool_size: 1024 * 1024 * 1024, // 1GB
51            stream_count: 4,
52            enable_peer_access: false,
53            enable_unified_memory: false,
54            enable_async_execution: true,
55            enable_multi_gpu: false,
56            preferred_gpu_ids: vec![0],
57            dynamic_batch_sizing: true,
58            enable_memory_compression: false,
59            kernel_cache_size: 100, // Cache up to 100 compiled kernels
60            optimization_level: OptimizationLevel::Balanced,
61            precision_mode: PrecisionMode::FP32,
62        }
63    }
64}
65
66impl GpuConfig {
67    /// Create a high-performance configuration
68    pub fn high_performance() -> Self {
69        Self {
70            optimization_level: OptimizationLevel::Performance,
71            enable_mixed_precision: true,
72            enable_tensor_cores: true,
73            enable_async_execution: true,
74            batch_size: 2048,
75            stream_count: 8,
76            ..Default::default()
77        }
78    }
79
80    /// Create a memory-optimized configuration
81    pub fn memory_optimized() -> Self {
82        Self {
83            enable_memory_compression: true,
84            enable_unified_memory: true,
85            batch_size: 512,
86            memory_pool_size: 512 * 1024 * 1024, // 512MB
87            ..Default::default()
88        }
89    }
90
91    /// Create a debug-friendly configuration
92    pub fn debug() -> Self {
93        Self {
94            optimization_level: OptimizationLevel::Debug,
95            enable_mixed_precision: false,
96            enable_async_execution: false,
97            batch_size: 64,
98            stream_count: 1,
99            ..Default::default()
100        }
101    }
102
103    /// Validate the configuration
104    pub fn validate(&self) -> anyhow::Result<()> {
105        if self.batch_size == 0 {
106            return Err(anyhow::anyhow!("Batch size must be greater than 0"));
107        }
108        if self.stream_count == 0 {
109            return Err(anyhow::anyhow!("Stream count must be greater than 0"));
110        }
111        if self.memory_pool_size == 0 {
112            return Err(anyhow::anyhow!("Memory pool size must be greater than 0"));
113        }
114        if self.kernel_cache_size == 0 {
115            return Err(anyhow::anyhow!("Kernel cache size must be greater than 0"));
116        }
117        if self.preferred_gpu_ids.is_empty() {
118            return Err(anyhow::anyhow!(
119                "Must specify at least one preferred GPU ID"
120            ));
121        }
122        Ok(())
123    }
124
125    /// Calculate optimal batch size based on available memory
126    pub fn calculate_optimal_batch_size(
127        &self,
128        vector_dim: usize,
129        available_memory: usize,
130    ) -> usize {
131        let bytes_per_vector = vector_dim * std::mem::size_of::<f32>();
132        let max_vectors = available_memory / bytes_per_vector / 4; // Reserve 75% for safety
133        max_vectors
134            .min(self.batch_size * 4)
135            .max(self.batch_size / 4)
136    }
137}