oxirs_vec/gpu/
config.rs

1//! GPU configuration structures and enums
2
3use serde::{Deserialize, Serialize};
4
5/// Configuration for GPU operations
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct GpuConfig {
8    pub device_id: i32,
9    pub enable_mixed_precision: bool,
10    pub enable_tensor_cores: bool,
11    pub batch_size: usize,
12    pub memory_pool_size: usize,
13    pub stream_count: usize,
14    pub enable_peer_access: bool,
15    pub enable_unified_memory: bool,
16    pub enable_async_execution: bool,
17    pub enable_multi_gpu: bool,
18    pub preferred_gpu_ids: Vec<i32>,
19    pub dynamic_batch_sizing: bool,
20    pub enable_memory_compression: bool,
21    pub kernel_cache_size: usize,
22    pub optimization_level: OptimizationLevel,
23    pub precision_mode: PrecisionMode,
24}
25
26/// GPU optimization levels
27#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
28pub enum OptimizationLevel {
29    Debug,       // Maximum debugging, minimal optimization
30    Balanced,    // Good balance of performance and debugging
31    Performance, // Maximum performance, minimal debugging
32    Extreme,     // Aggressive optimizations, may reduce precision
33}
34
35/// Precision modes for GPU computations
36#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
37pub enum PrecisionMode {
38    FP32,     // Single precision
39    FP16,     // Half precision
40    Mixed,    // Mixed precision (FP16 for compute, FP32 for storage)
41    INT8,     // 8-bit integer quantization
42    Adaptive, // Adaptive precision based on data characteristics
43}
44
45impl Default for GpuConfig {
46    fn default() -> Self {
47        Self {
48            device_id: 0,
49            enable_mixed_precision: true,
50            enable_tensor_cores: true,
51            batch_size: 1024,
52            memory_pool_size: 1024 * 1024 * 1024, // 1GB
53            stream_count: 4,
54            enable_peer_access: false,
55            enable_unified_memory: false,
56            enable_async_execution: true,
57            enable_multi_gpu: false,
58            preferred_gpu_ids: vec![0],
59            dynamic_batch_sizing: true,
60            enable_memory_compression: false,
61            kernel_cache_size: 100, // Cache up to 100 compiled kernels
62            optimization_level: OptimizationLevel::Balanced,
63            precision_mode: PrecisionMode::FP32,
64        }
65    }
66}
67
68impl GpuConfig {
69    /// Create a high-performance configuration
70    pub fn high_performance() -> Self {
71        Self {
72            optimization_level: OptimizationLevel::Performance,
73            enable_mixed_precision: true,
74            enable_tensor_cores: true,
75            enable_async_execution: true,
76            batch_size: 2048,
77            stream_count: 8,
78            ..Default::default()
79        }
80    }
81
82    /// Create a memory-optimized configuration
83    pub fn memory_optimized() -> Self {
84        Self {
85            enable_memory_compression: true,
86            enable_unified_memory: true,
87            batch_size: 512,
88            memory_pool_size: 512 * 1024 * 1024, // 512MB
89            ..Default::default()
90        }
91    }
92
93    /// Create a debug-friendly configuration
94    pub fn debug() -> Self {
95        Self {
96            optimization_level: OptimizationLevel::Debug,
97            enable_mixed_precision: false,
98            enable_async_execution: false,
99            batch_size: 64,
100            stream_count: 1,
101            ..Default::default()
102        }
103    }
104
105    /// Validate the configuration
106    pub fn validate(&self) -> anyhow::Result<()> {
107        if self.batch_size == 0 {
108            return Err(anyhow::anyhow!("Batch size must be greater than 0"));
109        }
110        if self.stream_count == 0 {
111            return Err(anyhow::anyhow!("Stream count must be greater than 0"));
112        }
113        if self.memory_pool_size == 0 {
114            return Err(anyhow::anyhow!("Memory pool size must be greater than 0"));
115        }
116        if self.kernel_cache_size == 0 {
117            return Err(anyhow::anyhow!("Kernel cache size must be greater than 0"));
118        }
119        if self.preferred_gpu_ids.is_empty() {
120            return Err(anyhow::anyhow!(
121                "Must specify at least one preferred GPU ID"
122            ));
123        }
124        Ok(())
125    }
126
127    /// Calculate optimal batch size based on available memory
128    pub fn calculate_optimal_batch_size(
129        &self,
130        vector_dim: usize,
131        available_memory: usize,
132    ) -> usize {
133        let bytes_per_vector = vector_dim * std::mem::size_of::<f32>();
134        let max_vectors = available_memory / bytes_per_vector / 4; // Reserve 75% for safety
135        max_vectors
136            .min(self.batch_size * 4)
137            .max(self.batch_size / 4)
138    }
139}