1use crate::gpu::{GpuBackend, GpuDevice};
10use crate::traits::SimdError;
11
12#[cfg(feature = "no-std")]
13use alloc::collections::BTreeMap as HashMap;
14#[cfg(feature = "no-std")]
15use alloc::sync::Arc;
16#[cfg(not(feature = "no-std"))]
17use std::collections::HashMap;
18#[cfg(not(feature = "no-std"))]
19use std::sync::Arc;
20
21#[cfg(feature = "no-std")]
22use alloc::{format, string::ToString};
23
24#[cfg(feature = "no-std")]
25use alloc::vec::Vec;
26#[cfg(not(feature = "no-std"))]
27use std::vec::Vec;
28
29#[cfg(feature = "no-std")]
30use spin::Mutex;
31#[cfg(not(feature = "no-std"))]
32use std::sync::Mutex;
33
34#[cfg(feature = "no-std")]
35use core::slice;
36#[cfg(not(feature = "no-std"))]
37use std::slice;
38
39pub struct GpuMemoryPool {
41 device: GpuDevice,
42 free_blocks: HashMap<usize, Vec<GpuMemoryBlock>>,
43 allocated_blocks: HashMap<usize, GpuMemoryBlock>,
44 total_allocated: usize,
45 peak_usage: usize,
46 allocation_count: usize,
47}
48
49#[derive(Debug, Clone)]
51pub struct GpuMemoryBlock {
52 ptr: *mut u8,
53 size: usize,
54 #[allow(dead_code)] device_id: u32,
56 #[allow(dead_code)] backend: GpuBackend,
58 #[allow(dead_code)] is_unified: bool,
60}
61
62unsafe impl Send for GpuMemoryBlock {}
63unsafe impl Sync for GpuMemoryBlock {}
64
65#[derive(Debug, Clone, Copy)]
67pub enum AllocationStrategy {
68 Simple,
70 Pooled,
72 Unified,
74 Pinned,
76}
77
78#[derive(Debug, Clone)]
80pub struct BandwidthConfig {
81 pub use_async_transfers: bool,
82 pub prefer_pinned_memory: bool,
83 pub coalesce_transfers: bool,
84 pub max_concurrent_streams: u32,
85}
86
87impl Default for BandwidthConfig {
88 fn default() -> Self {
89 Self {
90 use_async_transfers: true,
91 prefer_pinned_memory: true,
92 coalesce_transfers: true,
93 max_concurrent_streams: 4,
94 }
95 }
96}
97
98impl GpuMemoryPool {
99 pub fn new(device: GpuDevice) -> Self {
101 Self {
102 device,
103 free_blocks: HashMap::new(),
104 allocated_blocks: HashMap::new(),
105 total_allocated: 0,
106 peak_usage: 0,
107 allocation_count: 0,
108 }
109 }
110
111 pub fn allocate(
113 &mut self,
114 size: usize,
115 strategy: AllocationStrategy,
116 ) -> Result<GpuMemoryBlock, SimdError> {
117 self.allocation_count += 1;
118
119 if let Some(block) = self.find_free_block(size) {
121 self.allocated_blocks
122 .insert(block.ptr as usize, block.clone());
123 return Ok(block);
124 }
125
126 let block = self.allocate_new_block(size, strategy)?;
128 self.allocated_blocks
129 .insert(block.ptr as usize, block.clone());
130 self.total_allocated += size;
131 self.peak_usage = self.peak_usage.max(self.total_allocated);
132
133 Ok(block)
134 }
135
136 pub fn deallocate(&mut self, ptr: *mut u8) -> Result<(), SimdError> {
138 if let Some(block) = self.allocated_blocks.remove(&(ptr as usize)) {
139 self.total_allocated -= block.size;
140
141 let size_class = self.get_size_class(block.size);
143 self.free_blocks.entry(size_class).or_default().push(block);
144
145 Ok(())
146 } else {
147 Err(SimdError::InvalidParameter {
148 name: "ptr".to_string(),
149 value: "Invalid pointer for deallocation".to_string(),
150 })
151 }
152 }
153
154 pub fn get_stats(&self) -> MemoryStats {
156 MemoryStats {
157 total_allocated: self.total_allocated,
158 peak_usage: self.peak_usage,
159 allocation_count: self.allocation_count,
160 free_blocks_count: self.free_blocks.values().map(|v| v.len()).sum(),
161 device_memory_mb: self.device.memory_mb,
162 }
163 }
164
165 pub fn trim(&mut self) {
167 self.free_blocks.clear();
168 }
169
170 fn find_free_block(&mut self, size: usize) -> Option<GpuMemoryBlock> {
171 let size_class = self.get_size_class(size);
172
173 if let Some(blocks) = self.free_blocks.get_mut(&size_class) {
175 if let Some(block) = blocks.pop() {
176 return Some(block);
177 }
178 }
179
180 for (&class_size, blocks) in self.free_blocks.iter_mut() {
182 if class_size >= size_class && !blocks.is_empty() {
183 return blocks.pop();
184 }
185 }
186
187 None
188 }
189
190 fn allocate_new_block(
191 &self,
192 size: usize,
193 strategy: AllocationStrategy,
194 ) -> Result<GpuMemoryBlock, SimdError> {
195 match strategy {
196 AllocationStrategy::Simple => self.allocate_simple(size),
197 AllocationStrategy::Pooled => self.allocate_pooled(size),
198 AllocationStrategy::Unified => self.allocate_unified(size),
199 AllocationStrategy::Pinned => self.allocate_pinned(size),
200 }
201 }
202
203 fn allocate_simple(&self, size: usize) -> Result<GpuMemoryBlock, SimdError> {
204 match self.device.backend {
205 GpuBackend::Cuda => {
206 let _ = size;
208 Err(SimdError::UnsupportedOperation(
209 "CUDA not available".to_string(),
210 ))
211 }
212 GpuBackend::OpenCL => {
213 let _ = size;
215 Err(SimdError::UnsupportedOperation(
216 "OpenCL not available".to_string(),
217 ))
218 }
219 _ => Err(SimdError::UnsupportedOperation(
220 "Backend not supported".to_string(),
221 )),
222 }
223 }
224
225 fn allocate_pooled(&self, size: usize) -> Result<GpuMemoryBlock, SimdError> {
226 let pool_size = (size * 2).max(1024 * 1024); self.allocate_simple(pool_size)
229 }
230
231 fn allocate_unified(&self, size: usize) -> Result<GpuMemoryBlock, SimdError> {
232 if self.device.backend != GpuBackend::Cuda {
233 return Err(SimdError::UnsupportedOperation(
234 "Unified memory only available with CUDA".to_string(),
235 ));
236 }
237
238 let _ = size;
240 Err(SimdError::UnsupportedOperation(
241 "CUDA not available".to_string(),
242 ))
243 }
244
245 fn allocate_pinned(&self, size: usize) -> Result<GpuMemoryBlock, SimdError> {
246 match self.device.backend {
247 GpuBackend::Cuda => {
248 let _ = size;
250 Err(SimdError::UnsupportedOperation(
251 "CUDA not available".to_string(),
252 ))
253 }
254 _ => Err(SimdError::UnsupportedOperation(
255 "Pinned memory only available with CUDA".to_string(),
256 )),
257 }
258 }
259
260 fn get_size_class(&self, size: usize) -> usize {
261 if size == 0 {
263 return 1;
264 }
265 1 << (64 - size.leading_zeros())
266 }
267}
268
269#[derive(Debug, Clone)]
271pub struct MemoryStats {
272 pub total_allocated: usize,
273 pub peak_usage: usize,
274 pub allocation_count: usize,
275 pub free_blocks_count: usize,
276 pub device_memory_mb: u64,
277}
278
279impl MemoryStats {
280 pub fn utilization_percent(&self) -> f64 {
282 if self.device_memory_mb == 0 {
283 return 0.0;
284 }
285 (self.total_allocated as f64 / (self.device_memory_mb * 1024 * 1024) as f64) * 100.0
286 }
287
288 pub fn is_high_usage(&self, threshold: f64) -> bool {
290 self.utilization_percent() > threshold
291 }
292}
293
294pub struct MultiGpuMemoryManager {
296 pools: HashMap<u32, Arc<Mutex<GpuMemoryPool>>>,
297 allocation_strategy: AllocationStrategy,
298 bandwidth_config: BandwidthConfig,
299}
300
301impl MultiGpuMemoryManager {
302 pub fn new() -> Self {
303 Self {
304 pools: HashMap::new(),
305 allocation_strategy: AllocationStrategy::Pooled,
306 bandwidth_config: BandwidthConfig::default(),
307 }
308 }
309
310 pub fn add_device(&mut self, device: GpuDevice) {
312 let pool = Arc::new(Mutex::new(GpuMemoryPool::new(device.clone())));
313 self.pools.insert(device.id, pool);
314 }
315
316 pub fn allocate_on_device(
318 &self,
319 device_id: u32,
320 size: usize,
321 ) -> Result<GpuMemoryBlock, SimdError> {
322 if let Some(pool) = self.pools.get(&device_id) {
323 #[cfg(not(feature = "no-std"))]
324 let mut pool = pool.lock().map_err(|_| {
325 SimdError::ExternalLibraryError("Failed to lock memory pool".to_string())
326 })?;
327 #[cfg(feature = "no-std")]
328 let mut pool = pool.lock();
329 pool.allocate(size, self.allocation_strategy)
330 } else {
331 Err(SimdError::InvalidParameter {
332 name: "device_id".to_string(),
333 value: format!("Device {} not found", device_id),
334 })
335 }
336 }
337
338 pub fn allocate_on_best_device(&self, size: usize) -> Result<(u32, GpuMemoryBlock), SimdError> {
340 let best_device = self.find_best_device_for_allocation(size)?;
341 let block = self.allocate_on_device(best_device, size)?;
342 Ok((best_device, block))
343 }
344
345 pub fn deallocate_on_device(&self, device_id: u32, ptr: *mut u8) -> Result<(), SimdError> {
347 if let Some(pool) = self.pools.get(&device_id) {
348 #[cfg(not(feature = "no-std"))]
349 let mut pool = pool.lock().map_err(|_| {
350 SimdError::ExternalLibraryError("Failed to lock memory pool".to_string())
351 })?;
352 #[cfg(feature = "no-std")]
353 let mut pool = pool.lock();
354 pool.deallocate(ptr)
355 } else {
356 Err(SimdError::InvalidParameter {
357 name: "device_id".to_string(),
358 value: format!("Device {} not found", device_id),
359 })
360 }
361 }
362
363 pub fn get_all_stats(&self) -> HashMap<u32, MemoryStats> {
365 let mut stats = HashMap::new();
366 for (&device_id, pool) in &self.pools {
367 #[cfg(not(feature = "no-std"))]
368 {
369 if let Ok(pool) = pool.lock() {
370 stats.insert(device_id, pool.get_stats());
371 }
372 }
373 #[cfg(feature = "no-std")]
374 {
375 let pool = pool.lock();
376 stats.insert(device_id, pool.get_stats());
377 }
378 }
379 stats
380 }
381
382 fn find_best_device_for_allocation(&self, size: usize) -> Result<u32, SimdError> {
384 let mut best_device = None;
385 let mut min_usage = f64::INFINITY;
386
387 for (&device_id, pool) in &self.pools {
388 #[cfg(not(feature = "no-std"))]
389 let pool_result = pool.lock();
390 #[cfg(feature = "no-std")]
391 let pool_result: Result<_, ()> = Ok(pool.lock());
392
393 if let Ok(pool) = pool_result {
394 let stats = pool.get_stats();
395 let usage = stats.utilization_percent();
396
397 let available_mb =
399 stats.device_memory_mb - (stats.total_allocated / (1024 * 1024)) as u64;
400 let required_mb = (size / (1024 * 1024)) as u64 + 1;
401
402 if available_mb >= required_mb && usage < min_usage {
403 min_usage = usage;
404 best_device = Some(device_id);
405 }
406 }
407 }
408
409 best_device.ok_or_else(|| {
410 SimdError::ExternalLibraryError("No suitable device found for allocation".to_string())
411 })
412 }
413
414 pub fn set_allocation_strategy(&mut self, strategy: AllocationStrategy) {
416 self.allocation_strategy = strategy;
417 }
418
419 pub fn set_bandwidth_config(&mut self, config: BandwidthConfig) {
421 self.bandwidth_config = config;
422 }
423}
424
425impl Default for MultiGpuMemoryManager {
426 fn default() -> Self {
427 Self::new()
428 }
429}
430
431#[derive(Debug)]
433pub struct UnifiedMemoryBuffer<T> {
434 ptr: *mut T,
435 size: usize,
436 #[allow(dead_code)] device_id: u32,
438}
439
440impl<T> UnifiedMemoryBuffer<T> {
441 pub fn new(size: usize, device_id: u32) -> Result<Self, SimdError> {
443 let _ = (size, device_id);
445 Err(SimdError::UnsupportedOperation(
446 "CUDA unified memory not available".to_string(),
447 ))
448 }
449
450 pub fn as_mut_slice(&mut self) -> &mut [T] {
452 unsafe { slice::from_raw_parts_mut(self.ptr, self.size) }
453 }
454
455 pub fn as_slice(&self) -> &[T] {
457 unsafe { slice::from_raw_parts(self.ptr, self.size) }
458 }
459
460 pub fn prefetch_to_gpu(&self) -> Result<(), SimdError> {
462 Err(SimdError::UnsupportedOperation(
464 "CUDA not available".to_string(),
465 ))
466 }
467
468 pub fn prefetch_to_cpu(&self) -> Result<(), SimdError> {
470 Err(SimdError::UnsupportedOperation(
472 "CUDA not available".to_string(),
473 ))
474 }
475}
476
477impl<T> Drop for UnifiedMemoryBuffer<T> {
478 fn drop(&mut self) {
479 }
481}
482
483unsafe impl<T: Send> Send for UnifiedMemoryBuffer<T> {}
484unsafe impl<T: Sync> Sync for UnifiedMemoryBuffer<T> {}
485
486#[allow(non_snake_case)]
487#[cfg(all(test, not(feature = "no-std")))]
488mod tests {
489 use super::*;
490 use crate::gpu::GpuBackend;
491
492 #[cfg(feature = "no-std")]
493 use alloc::{
494 string::{String, ToString},
495 vec,
496 vec::Vec,
497 };
498
499 #[test]
500 fn test_memory_pool_creation() {
501 let device = GpuDevice {
502 id: 0,
503 name: "Test Device".to_string(),
504 backend: GpuBackend::Cuda,
505 compute_units: 80,
506 memory_mb: 8192,
507 supports_f64: true,
508 supports_f16: true,
509 };
510
511 let pool = GpuMemoryPool::new(device);
512 let stats = pool.get_stats();
513
514 assert_eq!(stats.total_allocated, 0);
515 assert_eq!(stats.allocation_count, 0);
516 }
517
518 #[test]
519 fn test_size_class_calculation() {
520 let device = GpuDevice {
521 id: 0,
522 name: "Test Device".to_string(),
523 backend: GpuBackend::Cuda,
524 compute_units: 80,
525 memory_mb: 8192,
526 supports_f64: true,
527 supports_f16: true,
528 };
529
530 let pool = GpuMemoryPool::new(device);
531
532 assert_eq!(pool.get_size_class(0), 1);
533 assert_eq!(pool.get_size_class(1), 2);
534 assert_eq!(pool.get_size_class(1000), 1024);
535 assert_eq!(pool.get_size_class(1024), 2048);
536 }
537
538 #[test]
539 fn test_memory_stats() {
540 let stats = MemoryStats {
541 total_allocated: 1024 * 1024, peak_usage: 2 * 1024 * 1024, allocation_count: 5,
544 free_blocks_count: 2,
545 device_memory_mb: 1024, };
547
548 assert!((stats.utilization_percent() - 0.09765625).abs() < 0.001); assert!(!stats.is_high_usage(50.0));
550 assert!(stats.is_high_usage(0.05));
551 }
552
553 #[test]
554 fn test_multi_gpu_manager() {
555 let mut manager = MultiGpuMemoryManager::new();
556
557 let device1 = GpuDevice {
558 id: 0,
559 name: "Device 0".to_string(),
560 backend: GpuBackend::Cuda,
561 compute_units: 80,
562 memory_mb: 8192,
563 supports_f64: true,
564 supports_f16: true,
565 };
566
567 let device2 = GpuDevice {
568 id: 1,
569 name: "Device 1".to_string(),
570 backend: GpuBackend::Cuda,
571 compute_units: 40,
572 memory_mb: 4096,
573 supports_f64: true,
574 supports_f16: true,
575 };
576
577 manager.add_device(device1);
578 manager.add_device(device2);
579
580 let stats = manager.get_all_stats();
581 assert_eq!(stats.len(), 2);
582 assert!(stats.contains_key(&0));
583 assert!(stats.contains_key(&1));
584 }
585
586 #[test]
587 fn test_bandwidth_config() {
588 let config = BandwidthConfig::default();
589 assert!(config.use_async_transfers);
590 assert!(config.prefer_pinned_memory);
591 assert!(config.coalesce_transfers);
592 assert_eq!(config.max_concurrent_streams, 4);
593 }
594
595 #[test]
596 fn test_allocation_strategies() {
597 let strategies = vec![
598 AllocationStrategy::Simple,
599 AllocationStrategy::Pooled,
600 AllocationStrategy::Unified,
601 AllocationStrategy::Pinned,
602 ];
603
604 for strategy in strategies {
606 let _ = match strategy {
607 AllocationStrategy::Simple => 0,
608 AllocationStrategy::Pooled => 1,
609 AllocationStrategy::Unified => 2,
610 AllocationStrategy::Pinned => 3,
611 };
612 }
613 }
614}