1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct GpuConfig {
8 pub device_id: i32,
9 pub enable_mixed_precision: bool,
10 pub enable_tensor_cores: bool,
11 pub batch_size: usize,
12 pub memory_pool_size: usize,
13 pub stream_count: usize,
14 pub enable_peer_access: bool,
15 pub enable_unified_memory: bool,
16 pub enable_async_execution: bool,
17 pub enable_multi_gpu: bool,
18 pub preferred_gpu_ids: Vec<i32>,
19 pub dynamic_batch_sizing: bool,
20 pub enable_memory_compression: bool,
21 pub kernel_cache_size: usize,
22 pub optimization_level: OptimizationLevel,
23 pub precision_mode: PrecisionMode,
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
28pub enum OptimizationLevel {
29 Debug, Balanced, Performance, Extreme, }
34
35#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
37pub enum PrecisionMode {
38 FP32, FP16, Mixed, INT8, Adaptive, }
44
45impl Default for GpuConfig {
46 fn default() -> Self {
47 Self {
48 device_id: 0,
49 enable_mixed_precision: true,
50 enable_tensor_cores: true,
51 batch_size: 1024,
52 memory_pool_size: 1024 * 1024 * 1024, stream_count: 4,
54 enable_peer_access: false,
55 enable_unified_memory: false,
56 enable_async_execution: true,
57 enable_multi_gpu: false,
58 preferred_gpu_ids: vec![0],
59 dynamic_batch_sizing: true,
60 enable_memory_compression: false,
61 kernel_cache_size: 100, optimization_level: OptimizationLevel::Balanced,
63 precision_mode: PrecisionMode::FP32,
64 }
65 }
66}
67
68impl GpuConfig {
69 pub fn high_performance() -> Self {
71 Self {
72 optimization_level: OptimizationLevel::Performance,
73 enable_mixed_precision: true,
74 enable_tensor_cores: true,
75 enable_async_execution: true,
76 batch_size: 2048,
77 stream_count: 8,
78 ..Default::default()
79 }
80 }
81
82 pub fn memory_optimized() -> Self {
84 Self {
85 enable_memory_compression: true,
86 enable_unified_memory: true,
87 batch_size: 512,
88 memory_pool_size: 512 * 1024 * 1024, ..Default::default()
90 }
91 }
92
93 pub fn debug() -> Self {
95 Self {
96 optimization_level: OptimizationLevel::Debug,
97 enable_mixed_precision: false,
98 enable_async_execution: false,
99 batch_size: 64,
100 stream_count: 1,
101 ..Default::default()
102 }
103 }
104
105 pub fn validate(&self) -> anyhow::Result<()> {
107 if self.batch_size == 0 {
108 return Err(anyhow::anyhow!("Batch size must be greater than 0"));
109 }
110 if self.stream_count == 0 {
111 return Err(anyhow::anyhow!("Stream count must be greater than 0"));
112 }
113 if self.memory_pool_size == 0 {
114 return Err(anyhow::anyhow!("Memory pool size must be greater than 0"));
115 }
116 if self.kernel_cache_size == 0 {
117 return Err(anyhow::anyhow!("Kernel cache size must be greater than 0"));
118 }
119 if self.preferred_gpu_ids.is_empty() {
120 return Err(anyhow::anyhow!(
121 "Must specify at least one preferred GPU ID"
122 ));
123 }
124 Ok(())
125 }
126
127 pub fn calculate_optimal_batch_size(
129 &self,
130 vector_dim: usize,
131 available_memory: usize,
132 ) -> usize {
133 let bytes_per_vector = vector_dim * std::mem::size_of::<f32>();
134 let max_vectors = available_memory / bytes_per_vector / 4; max_vectors
136 .min(self.batch_size * 4)
137 .max(self.batch_size / 4)
138 }
139}