scirs2_metrics/optimization/gpu_kernels/
contexts.rs1#![allow(clippy::too_many_arguments)]
7#![allow(dead_code)]
8
9use super::runtime::{CudaRuntime, OpenClRuntime};
10use crate::error::Result;
11use std::collections::HashMap;
12use std::sync::{Arc, Mutex};
13use std::time::Instant;
14
15#[derive(Debug)]
17pub struct CudaContext {
18 pub _device_id: i32,
20 pub context_handle: usize,
22 pub streams: Vec<usize>,
24 pub memory_pool: Arc<Mutex<CudaMemoryPool>>,
26 pub device_props: CudaDeviceProperties,
28 pub runtime: Arc<Mutex<CudaRuntime>>,
30}
31
32#[derive(Debug, Clone)]
34pub struct CudaDeviceProperties {
35 pub name: String,
36 pub major: i32,
37 pub minor: i32,
38 pub total_global_mem: usize,
39 pub shared_mem_per_block: usize,
40 pub max_threads_per_block: i32,
41 pub max_threads_dim: [i32; 3],
42 pub max_grid_size: [i32; 3],
43 pub warp_size: i32,
44 pub memory_pitch: usize,
45 pub max_threads_per_multiprocessor: i32,
46 pub multiprocessor_count: i32,
47 pub clock_rate: i32,
48 pub memory_clock_rate: i32,
49 pub memory_bus_width: i32,
50 pub l2_cache_size: i32,
51 pub texture_alignment: usize,
52 pub concurrent_kernels: bool,
53 pub compute_mode: i32,
54 pub unified_addressing: bool,
55}
56
57#[derive(Debug)]
59pub struct CudaMemoryPool {
60 pub free_blocks: HashMap<usize, Vec<CudaMemoryBlock>>,
62 pub allocated_blocks: HashMap<usize, CudaMemoryBlock>,
64 pub total_allocated: usize,
66 pub memory_limit: usize,
68}
69
70#[derive(Debug, Clone)]
72pub struct CudaMemoryBlock {
73 pub ptr: usize,
75 pub size: usize,
77 pub allocated_at: Instant,
79}
80
81#[derive(Debug)]
83pub struct OpenClContext {
84 pub platform_id: usize,
86 pub _device_id: usize,
88 pub context_handle: usize,
90 pub command_queue: usize,
92 pub program_cache: Arc<Mutex<HashMap<String, usize>>>,
94 pub device_info: OpenClDeviceInfo,
96 pub runtime: Arc<Mutex<OpenClRuntime>>,
98}
99
100#[derive(Debug, Clone)]
102pub struct OpenClDeviceInfo {
103 pub name: String,
104 pub vendor: String,
105 pub version: String,
106 pub profile: String,
107 pub global_mem_size: usize,
108 pub local_mem_size: usize,
109 pub max_work_group_size: usize,
110 pub max_work_item_dimensions: u32,
111 pub max_work_item_sizes: Vec<usize>,
112 pub max_compute_units: u32,
113 pub max_clock_frequency: u32,
114 pub address_bits: u32,
115 pub image_support: bool,
116 pub preferred_vector_width_float: u32,
117 pub preferred_vector_width_double: u32,
118}
119
120impl CudaMemoryPool {
121 pub fn new(memory_limit: usize) -> Self {
123 Self {
124 free_blocks: HashMap::new(),
125 allocated_blocks: HashMap::new(),
126 total_allocated: 0,
127 memory_limit,
128 }
129 }
130
131 pub fn allocate(&mut self, size: usize) -> Option<CudaMemoryBlock> {
133 if self.total_allocated + size > self.memory_limit {
134 return None;
135 }
136
137 if let Some(blocks) = self.free_blocks.get_mut(&size) {
139 if let Some(block) = blocks.pop() {
140 self.allocated_blocks.insert(block.ptr, block.clone());
141 return Some(block);
142 }
143 }
144
145 let ptr = self.total_allocated + 0x10000000; let block = CudaMemoryBlock {
148 ptr,
149 size,
150 allocated_at: Instant::now(),
151 };
152
153 self.total_allocated += size;
154 self.allocated_blocks.insert(ptr, block.clone());
155 Some(block)
156 }
157
158 pub fn free(&mut self, ptr: usize) -> bool {
160 if let Some(block) = self.allocated_blocks.remove(&ptr) {
161 self.total_allocated -= block.size;
162 self.free_blocks
163 .entry(block.size)
164 .or_insert_with(Vec::new)
165 .push(block);
166 true
167 } else {
168 false
169 }
170 }
171
172 pub fn get_stats(&self) -> CudaMemoryStats {
174 CudaMemoryStats {
175 total_allocated: self.total_allocated,
176 free_blocks: self.free_blocks.values().map(|v| v.len()).sum(),
177 allocated_blocks: self.allocated_blocks.len(),
178 memory_limit: self.memory_limit,
179 }
180 }
181}
182
183#[derive(Debug, Clone)]
185pub struct CudaMemoryStats {
186 pub total_allocated: usize,
187 pub free_blocks: usize,
188 pub allocated_blocks: usize,
189 pub memory_limit: usize,
190}