#![allow(clippy::too_many_arguments)]
#![allow(dead_code)]
use super::runtime::{CudaRuntime, OpenClRuntime};
use crate::error::Result;
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::Instant;
#[derive(Debug)]
pub struct CudaContext {
pub _device_id: i32,
pub context_handle: usize,
pub streams: Vec<usize>,
pub memory_pool: Arc<Mutex<CudaMemoryPool>>,
pub device_props: CudaDeviceProperties,
pub runtime: Arc<Mutex<CudaRuntime>>,
}
#[derive(Debug, Clone)]
pub struct CudaDeviceProperties {
pub name: String,
pub major: i32,
pub minor: i32,
pub total_global_mem: usize,
pub shared_mem_per_block: usize,
pub max_threads_per_block: i32,
pub max_threads_dim: [i32; 3],
pub max_grid_size: [i32; 3],
pub warp_size: i32,
pub memory_pitch: usize,
pub max_threads_per_multiprocessor: i32,
pub multiprocessor_count: i32,
pub clock_rate: i32,
pub memory_clock_rate: i32,
pub memory_bus_width: i32,
pub l2_cache_size: i32,
pub texture_alignment: usize,
pub concurrent_kernels: bool,
pub compute_mode: i32,
pub unified_addressing: bool,
}
#[derive(Debug)]
pub struct CudaMemoryPool {
pub free_blocks: HashMap<usize, Vec<CudaMemoryBlock>>,
pub allocated_blocks: HashMap<usize, CudaMemoryBlock>,
pub total_allocated: usize,
pub memory_limit: usize,
}
#[derive(Debug, Clone)]
pub struct CudaMemoryBlock {
pub ptr: usize,
pub size: usize,
pub allocated_at: Instant,
}
#[derive(Debug)]
pub struct OpenClContext {
pub platform_id: usize,
pub _device_id: usize,
pub context_handle: usize,
pub command_queue: usize,
pub program_cache: Arc<Mutex<HashMap<String, usize>>>,
pub device_info: OpenClDeviceInfo,
pub runtime: Arc<Mutex<OpenClRuntime>>,
}
#[derive(Debug, Clone)]
pub struct OpenClDeviceInfo {
pub name: String,
pub vendor: String,
pub version: String,
pub profile: String,
pub global_mem_size: usize,
pub local_mem_size: usize,
pub max_work_group_size: usize,
pub max_work_item_dimensions: u32,
pub max_work_item_sizes: Vec<usize>,
pub max_compute_units: u32,
pub max_clock_frequency: u32,
pub address_bits: u32,
pub image_support: bool,
pub preferred_vector_width_float: u32,
pub preferred_vector_width_double: u32,
}
impl CudaMemoryPool {
pub fn new(memory_limit: usize) -> Self {
Self {
free_blocks: HashMap::new(),
allocated_blocks: HashMap::new(),
total_allocated: 0,
memory_limit,
}
}
pub fn allocate(&mut self, size: usize) -> Option<CudaMemoryBlock> {
if self.total_allocated + size > self.memory_limit {
return None;
}
if let Some(blocks) = self.free_blocks.get_mut(&size) {
if let Some(block) = blocks.pop() {
self.allocated_blocks.insert(block.ptr, block.clone());
return Some(block);
}
}
let ptr = self.total_allocated + 0x10000000; let block = CudaMemoryBlock {
ptr,
size,
allocated_at: Instant::now(),
};
self.total_allocated += size;
self.allocated_blocks.insert(ptr, block.clone());
Some(block)
}
pub fn free(&mut self, ptr: usize) -> bool {
if let Some(block) = self.allocated_blocks.remove(&ptr) {
self.total_allocated -= block.size;
self.free_blocks
.entry(block.size)
.or_insert_with(Vec::new)
.push(block);
true
} else {
false
}
}
pub fn get_stats(&self) -> CudaMemoryStats {
CudaMemoryStats {
total_allocated: self.total_allocated,
free_blocks: self.free_blocks.values().map(|v| v.len()).sum(),
allocated_blocks: self.allocated_blocks.len(),
memory_limit: self.memory_limit,
}
}
}
#[derive(Debug, Clone)]
pub struct CudaMemoryStats {
pub total_allocated: usize,
pub free_blocks: usize,
pub allocated_blocks: usize,
pub memory_limit: usize,
}