1use super::{
7 cpu::CpuInfo, gpu::GpuInfo, memory::MemoryInfo, network::NetworkInfo, storage::StorageInfo,
8};
9use crate::error::CoreResult;
10
11#[derive(Debug, Clone)]
13pub struct OptimizationParams {
14 pub thread_count: usize,
16 pub chunk_size: usize,
18 pub enable_simd: bool,
20 pub enable_gpu: bool,
22 pub enable_prefetch: bool,
24 pub numa_aware: bool,
26 pub cache_params: CacheParams,
28 pub io_params: IoParams,
30 pub gpu_params: Option<GpuParams>,
32}
33
34impl Default for OptimizationParams {
35 fn default() -> Self {
36 Self {
37 thread_count: std::thread::available_parallelism()
38 .map(|n| n.get())
39 .unwrap_or(4),
40 chunk_size: 64 * 1024, enable_simd: false,
42 enable_gpu: false,
43 enable_prefetch: true,
44 numa_aware: false,
45 cache_params: CacheParams::default(),
46 io_params: IoParams::default(),
47 gpu_params: None,
48 }
49 }
50}
51
52impl OptimizationParams {
53 pub fn generate(
55 cpu: &CpuInfo,
56 memory: &MemoryInfo,
57 gpu: Option<&GpuInfo>,
58 network: &NetworkInfo,
59 storage: &StorageInfo,
60 ) -> CoreResult<Self> {
61 let thread_count = Self::calculate_optimal_thread_count(cpu, memory);
62 let chunk_size = Self::calculate_optimal_chunk_size(cpu, memory, storage);
63 let enable_simd = Self::should_enable_simd(cpu);
64 let enable_gpu = Self::should_enable_gpu(gpu);
65 let enable_prefetch = Self::should_enable_prefetch(memory, storage);
66 let numa_aware = memory.numa_nodes > 1;
67
68 let cache_params = CacheParams::from_cpu(cpu);
69 let io_params = IoParams::from_resources(network, storage);
70 let gpu_params = gpu.map(GpuParams::from_gpu);
71
72 Ok(Self {
73 thread_count,
74 chunk_size,
75 enable_simd,
76 enable_gpu,
77 enable_prefetch,
78 numa_aware,
79 cache_params,
80 io_params,
81 gpu_params,
82 })
83 }
84
85 fn calculate_optimal_thread_count(cpu: &CpuInfo, memory: &MemoryInfo) -> usize {
87 let base_threads = cpu.physical_cores;
88
89 let ht_benefit = if cpu.logical_cores > cpu.physical_cores {
91 (cpu.logical_cores - cpu.physical_cores) / 2
92 } else {
93 0
94 };
95
96 let memory_factor = if memory.is_under_pressure() {
98 0.75 } else {
100 1.0
101 };
102
103 let optimal = ((base_threads + ht_benefit) as f64 * memory_factor) as usize;
104 optimal.max(1).min(cpu.logical_cores)
105 }
106
107 fn calculate_optimal_chunk_size(
109 cpu: &CpuInfo,
110 memory: &MemoryInfo,
111 storage: &StorageInfo,
112 ) -> usize {
113 let cachebased = cpu.cache_l3_kb * 1024 / 4; let memorybased = memory.optimal_chunk_size();
118
119 let storagebased = storage.optimal_io_size;
121
122 let geometric_mean = ((cachebased as f64 * memorybased as f64 * storagebased as f64)
124 .powf(1.0 / 3.0)) as usize;
125
126 geometric_mean.clamp(4 * 1024, 64 * 1024 * 1024)
128 }
129
130 fn should_enable_simd(cpu: &CpuInfo) -> bool {
132 cpu.simd_capabilities.sse4_2 || cpu.simd_capabilities.avx2 || cpu.simd_capabilities.neon
133 }
134
135 fn should_enable_gpu(gpuinfo: Option<&GpuInfo>) -> bool {
137 gpuinfo.map(|g| g.is_compute_capable()).unwrap_or(false)
138 }
139
140 fn should_enable_prefetch(memory: &MemoryInfo, storage: &StorageInfo) -> bool {
142 !memory.is_under_pressure() && storage.supports_async_io()
144 }
145
146 pub fn get_scaling_factor(problemsize: usize) -> f64 {
148 let base_size = 1024 * 1024; if problemsize <= base_size {
150 1.0
151 } else {
152 let ratio = problemsize as f64 / base_size as f64;
153 ratio.sqrt()
155 }
156 }
157
158 pub fn scaling_factor(&self, problemsize: usize) -> f64 {
160 Self::get_scaling_factor(problemsize)
161 }
162
163 pub fn adjust_for_workload(&mut self, workload: WorkloadType) {
165 match workload {
166 WorkloadType::CpuIntensive => {
167 self.thread_count = self.thread_count.max(
169 std::thread::available_parallelism()
170 .map(|n| n.get())
171 .unwrap_or(4),
172 );
173 self.chunk_size = self.chunk_size.max(1024 * 1024); }
175 WorkloadType::MemoryIntensive => {
176 self.enable_prefetch = true;
178 self.chunk_size = self.chunk_size.min(256 * 1024); }
180 WorkloadType::IoIntensive => {
181 self.thread_count = (self.thread_count * 2).min(16); self.chunk_size = self.io_params.optimal_buffersize;
184 }
185 WorkloadType::GpuIntensive => {
186 if self.enable_gpu {
188 self.thread_count = self.thread_count.min(4); }
190 }
191 }
192 }
193}
194
195#[derive(Debug, Clone)]
197pub struct CacheParams {
198 pub cache_line_size: usize,
200 pub alignment: usize,
202 pub prefetch_distance: usize,
204 pub tile_size: usize,
206}
207
208impl Default for CacheParams {
209 fn default() -> Self {
210 Self {
211 cache_line_size: 64,
212 alignment: 64,
213 prefetch_distance: 64,
214 tile_size: 64,
215 }
216 }
217}
218
219impl CacheParams {
220 pub fn from_cpu(cpu: &CpuInfo) -> Self {
222 let cache_line_size = 64; let alignment = cache_line_size;
224
225 let prefetch_distance = (cpu.cache_l1_kb * 1024 / 16).clamp(64, 1024);
227
228 let tile_size = (cpu.cache_l1_kb * 1024 / 8).clamp(64, 4096);
230
231 Self {
232 cache_line_size,
233 alignment,
234 prefetch_distance,
235 tile_size,
236 }
237 }
238}
239
240#[derive(Debug, Clone)]
242pub struct IoParams {
243 pub optimal_buffersize: usize,
245 pub concurrent_operations: usize,
247 pub enable_async_io: bool,
249 pub enable_io_cache: bool,
251}
252
253impl Default for IoParams {
254 fn default() -> Self {
255 Self {
256 optimal_buffersize: 64 * 1024, concurrent_operations: 4,
258 enable_async_io: true,
259 enable_io_cache: true,
260 }
261 }
262}
263
264impl IoParams {
265 pub fn from_network(network: &NetworkInfo, storage: &StorageInfo) -> Self {
267 let optimal_buffersize = storage.optimal_io_size.max(network.mtu);
268 let concurrent_operations = storage.queue_depth.min(16);
269 let enable_async_io = storage.supports_async_io();
270 let enable_io_cache = !storage.is_ssd() || storage.capacity > 512 * 1024 * 1024 * 1024; Self {
273 optimal_buffersize,
274 concurrent_operations,
275 enable_async_io,
276 enable_io_cache,
277 }
278 }
279
280 pub fn from_resources(network: &NetworkInfo, storage: &StorageInfo) -> Self {
282 Self::from_network(network, storage)
283 }
284}
285
286#[derive(Debug, Clone)]
288pub struct GpuParams {
289 pub workgroup_size: usize,
291 pub workgroup_count: usize,
293 pub shared_memory_size: usize,
295 pub use_unified_memory: bool,
297 pub transfer_strategy: GpuTransferStrategy,
299}
300
301impl GpuParams {
302 pub fn from_gpu(gpu: &GpuInfo) -> Self {
304 let workgroup_size = gpu.optimal_workgroup_size();
305 let workgroup_count = (gpu.compute_units * 4).min(65535); let shared_memory_size = 16 * 1024; let use_unified_memory = gpu.features.unified_memory;
308
309 let transfer_strategy = if gpu.memorybandwidth_gbps > 500.0 {
310 GpuTransferStrategy::HighBandwidth
311 } else if use_unified_memory {
312 GpuTransferStrategy::Unified
313 } else {
314 GpuTransferStrategy::Standard
315 };
316
317 Self {
318 workgroup_size,
319 workgroup_count,
320 shared_memory_size,
321 use_unified_memory,
322 transfer_strategy,
323 }
324 }
325}
326
327#[derive(Debug, Clone, Copy, PartialEq, Eq)]
329pub enum GpuTransferStrategy {
330 Standard,
332 HighBandwidth,
334 Unified,
336 ZeroCopy,
338}
339
340#[derive(Debug, Clone, Copy, PartialEq, Eq)]
342pub enum WorkloadType {
343 CpuIntensive,
345 MemoryIntensive,
347 IoIntensive,
349 GpuIntensive,
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356
357 #[test]
358 fn test_optimization_params_generation() {
359 let cpu = CpuInfo::default();
360 let memory = MemoryInfo::default();
361 let gpu = Some(GpuInfo::default());
362 let network = NetworkInfo::default();
363 let storage = StorageInfo::default();
364
365 let params = OptimizationParams::generate(&cpu, &memory, gpu.as_ref(), &network, &storage);
366 assert!(params.is_ok());
367
368 let params = params.unwrap();
369 assert!(params.thread_count > 0);
370 assert!(params.chunk_size > 0);
371 }
372
373 #[test]
374 fn test_thread_count_calculation() {
375 let cpu = CpuInfo {
376 physical_cores: 8,
377 logical_cores: 16,
378 ..Default::default()
379 };
380 let memory = MemoryInfo::default();
381
382 let thread_count = OptimizationParams::calculate_optimal_thread_count(&cpu, &memory);
383 assert!(thread_count >= 8);
384 assert!(thread_count <= 16);
385 }
386
387 #[test]
388 fn test_chunk_size_calculation() {
389 let cpu = CpuInfo {
390 cache_l3_kb: 8192, ..Default::default()
392 };
393 let memory = MemoryInfo::default();
394 let storage = StorageInfo::default();
395
396 let chunk_size = OptimizationParams::calculate_optimal_chunk_size(&cpu, &memory, &storage);
397 assert!(chunk_size >= 4 * 1024); assert!(chunk_size <= 64 * 1024 * 1024); }
400
401 #[test]
402 fn test_workload_adjustment() {
403 let mut params = OptimizationParams::default();
404 let original_thread_count = params.thread_count;
405
406 params.adjust_for_workload(WorkloadType::CpuIntensive);
407 assert!(params.thread_count >= original_thread_count);
408
409 params.adjust_for_workload(WorkloadType::MemoryIntensive);
410 assert!(params.enable_prefetch);
411 }
412
413 #[test]
414 fn test_cache_params() {
415 let cpu = CpuInfo {
416 cache_l1_kb: 32,
417 ..Default::default()
418 };
419
420 let cache_params = CacheParams::from_cpu(&cpu);
421 assert_eq!(cache_params.cache_line_size, 64);
422 assert!(cache_params.tile_size > 0);
423 }
424
425 #[test]
426 fn test_gpu_params() {
427 let gpu = GpuInfo {
428 vendor: super::super::gpu::GpuVendor::Nvidia,
429 compute_units: 2048,
430 features: super::super::gpu::GpuFeatures {
431 unified_memory: true,
432 ..Default::default()
433 },
434 ..Default::default()
435 };
436
437 let gpu_params = GpuParams::from_gpu(&gpu);
438 assert_eq!(gpu_params.workgroup_size, 256); assert!(gpu_params.use_unified_memory);
440 assert_eq!(gpu_params.transfer_strategy, GpuTransferStrategy::Unified);
441 }
442
443 #[test]
444 fn test_scaling_factor() {
445 let params = OptimizationParams::default();
446
447 assert_eq!(params.scaling_factor(1024), 1.0); assert!(params.scaling_factor(1024 * 1024 * 4) > 1.0); }
450}
451
452