1#[derive(Debug, Clone)]
5pub struct GpuConfig {
6 pub device_id: i32,
7 pub enable_mixed_precision: bool,
8 pub enable_tensor_cores: bool,
9 pub batch_size: usize,
10 pub memory_pool_size: usize,
11 pub stream_count: usize,
12 pub enable_peer_access: bool,
13 pub enable_unified_memory: bool,
14 pub enable_async_execution: bool,
15 pub enable_multi_gpu: bool,
16 pub preferred_gpu_ids: Vec<i32>,
17 pub dynamic_batch_sizing: bool,
18 pub enable_memory_compression: bool,
19 pub kernel_cache_size: usize,
20 pub optimization_level: OptimizationLevel,
21 pub precision_mode: PrecisionMode,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq)]
26pub enum OptimizationLevel {
27 Debug, Balanced, Performance, Extreme, }
32
33#[derive(Debug, Clone, Copy, PartialEq)]
35pub enum PrecisionMode {
36 FP32, FP16, Mixed, INT8, Adaptive, }
42
43impl Default for GpuConfig {
44 fn default() -> Self {
45 Self {
46 device_id: 0,
47 enable_mixed_precision: true,
48 enable_tensor_cores: true,
49 batch_size: 1024,
50 memory_pool_size: 1024 * 1024 * 1024, stream_count: 4,
52 enable_peer_access: false,
53 enable_unified_memory: false,
54 enable_async_execution: true,
55 enable_multi_gpu: false,
56 preferred_gpu_ids: vec![0],
57 dynamic_batch_sizing: true,
58 enable_memory_compression: false,
59 kernel_cache_size: 100, optimization_level: OptimizationLevel::Balanced,
61 precision_mode: PrecisionMode::FP32,
62 }
63 }
64}
65
66impl GpuConfig {
67 pub fn high_performance() -> Self {
69 Self {
70 optimization_level: OptimizationLevel::Performance,
71 enable_mixed_precision: true,
72 enable_tensor_cores: true,
73 enable_async_execution: true,
74 batch_size: 2048,
75 stream_count: 8,
76 ..Default::default()
77 }
78 }
79
80 pub fn memory_optimized() -> Self {
82 Self {
83 enable_memory_compression: true,
84 enable_unified_memory: true,
85 batch_size: 512,
86 memory_pool_size: 512 * 1024 * 1024, ..Default::default()
88 }
89 }
90
91 pub fn debug() -> Self {
93 Self {
94 optimization_level: OptimizationLevel::Debug,
95 enable_mixed_precision: false,
96 enable_async_execution: false,
97 batch_size: 64,
98 stream_count: 1,
99 ..Default::default()
100 }
101 }
102
103 pub fn validate(&self) -> anyhow::Result<()> {
105 if self.batch_size == 0 {
106 return Err(anyhow::anyhow!("Batch size must be greater than 0"));
107 }
108 if self.stream_count == 0 {
109 return Err(anyhow::anyhow!("Stream count must be greater than 0"));
110 }
111 if self.memory_pool_size == 0 {
112 return Err(anyhow::anyhow!("Memory pool size must be greater than 0"));
113 }
114 if self.kernel_cache_size == 0 {
115 return Err(anyhow::anyhow!("Kernel cache size must be greater than 0"));
116 }
117 if self.preferred_gpu_ids.is_empty() {
118 return Err(anyhow::anyhow!(
119 "Must specify at least one preferred GPU ID"
120 ));
121 }
122 Ok(())
123 }
124
125 pub fn calculate_optimal_batch_size(
127 &self,
128 vector_dim: usize,
129 available_memory: usize,
130 ) -> usize {
131 let bytes_per_vector = vector_dim * std::mem::size_of::<f32>();
132 let max_vectors = available_memory / bytes_per_vector / 4; max_vectors
134 .min(self.batch_size * 4)
135 .max(self.batch_size / 4)
136 }
137}