baracuda_runtime/
query.rs1use baracuda_cuda_sys::runtime::runtime;
6use baracuda_cuda_sys::runtime::types::{
7 cudaFuncAttributes, cudaMemoryType, cudaPointerAttributes,
8};
9
10use crate::device::Device;
11use crate::error::{check, Result};
12
13#[derive(Copy, Clone, Debug, Eq, PartialEq)]
15pub enum MemoryType {
16 Unregistered,
19 Host,
21 Device,
23 Managed,
25}
26
27impl MemoryType {
28 #[inline]
29 fn from_raw(raw: i32) -> Self {
30 match raw {
31 cudaMemoryType::HOST => MemoryType::Host,
32 cudaMemoryType::DEVICE => MemoryType::Device,
33 cudaMemoryType::MANAGED => MemoryType::Managed,
34 _ => MemoryType::Unregistered,
35 }
36 }
37}
38
39#[derive(Copy, Clone, Debug)]
41pub struct PointerAttributes {
42 pub memory_type: MemoryType,
43 pub device: i32,
44 pub device_pointer: *mut core::ffi::c_void,
45 pub host_pointer: *mut core::ffi::c_void,
46}
47
48#[allow(clippy::not_unsafe_ptr_arg_deref)]
57pub fn pointer_attributes(ptr: *const core::ffi::c_void) -> Result<PointerAttributes> {
58 let r = runtime()?;
59 let cu = r.cuda_pointer_get_attributes()?;
60 let mut raw = cudaPointerAttributes::default();
61 check(unsafe {
62 cu(
63 &mut raw as *mut cudaPointerAttributes as *mut core::ffi::c_void,
64 ptr,
65 )
66 })?;
67 Ok(PointerAttributes {
68 memory_type: MemoryType::from_raw(raw.type_),
69 device: raw.device,
70 device_pointer: raw.device_pointer,
71 host_pointer: raw.host_pointer,
72 })
73}
74
75#[derive(Clone, Debug)]
80pub struct DeviceProperties {
81 pub name: String,
82 pub total_global_memory_bytes: u64,
83 pub shared_memory_per_block_bytes: u64,
84 pub regs_per_block: i32,
85 pub warp_size: i32,
86 pub max_threads_per_block: i32,
87 pub max_block_dim: [i32; 3],
88 pub max_grid_dim: [i32; 3],
89 pub clock_rate_khz: i32,
90 pub memory_clock_rate_khz: i32,
91 pub memory_bus_width_bits: i32,
92 pub l2_cache_size_bytes: i32,
93 pub max_threads_per_sm: i32,
94 pub multiprocessor_count: i32,
95 pub compute_capability_major: i32,
96 pub compute_capability_minor: i32,
97 pub integrated: bool,
98 pub concurrent_kernels: bool,
99 pub pci_bus_id: i32,
100 pub pci_device_id: i32,
101 pub pci_domain_id: i32,
102}
103
104pub fn device_properties(device: &Device) -> Result<DeviceProperties> {
112 use baracuda_cuda_sys::runtime::types::cudaDeviceAttr as Attr;
113
114 let r = runtime()?;
115 let cu = r.cuda_get_device_properties()?;
116 let mut buf = vec![0u8; 2048];
117 check(unsafe { cu(buf.as_mut_ptr() as *mut core::ffi::c_void, device.ordinal()) })?;
118
119 let name = unsafe {
123 let name_ptr = buf.as_ptr() as *const core::ffi::c_char;
124 core::ffi::CStr::from_ptr(name_ptr)
125 .to_string_lossy()
126 .into_owned()
127 };
128
129 let total_global_memory_bytes = {
131 let cu_info = r.cuda_mem_get_info()?;
132 let mut free: usize = 0;
133 let mut total: usize = 0;
134 check(unsafe { cu_info(&mut free, &mut total) })?;
135 total as u64
136 };
137
138 Ok(DeviceProperties {
139 name,
140 total_global_memory_bytes,
141 shared_memory_per_block_bytes: device
142 .attribute(Attr::MAX_SHARED_MEMORY_PER_BLOCK)
143 .unwrap_or(0) as u64,
144 regs_per_block: device.attribute(Attr::MAX_REGISTERS_PER_BLOCK).unwrap_or(0),
145 warp_size: device.attribute(Attr::WARP_SIZE).unwrap_or(0),
146 max_threads_per_block: device.attribute(Attr::MAX_THREADS_PER_BLOCK).unwrap_or(0),
147 max_block_dim: [
148 device.attribute(Attr::MAX_BLOCK_DIM_X).unwrap_or(0),
149 device.attribute(Attr::MAX_BLOCK_DIM_Y).unwrap_or(0),
150 device.attribute(Attr::MAX_BLOCK_DIM_Z).unwrap_or(0),
151 ],
152 max_grid_dim: [
153 device.attribute(Attr::MAX_GRID_DIM_X).unwrap_or(0),
154 device.attribute(Attr::MAX_GRID_DIM_Y).unwrap_or(0),
155 device.attribute(Attr::MAX_GRID_DIM_Z).unwrap_or(0),
156 ],
157 clock_rate_khz: device.attribute(Attr::CLOCK_RATE).unwrap_or(0),
158 memory_clock_rate_khz: device.attribute(Attr::CLOCK_RATE).unwrap_or(0),
159 memory_bus_width_bits: 0,
160 l2_cache_size_bytes: 0,
161 max_threads_per_sm: 0,
162 multiprocessor_count: device.attribute(Attr::MULTIPROCESSOR_COUNT).unwrap_or(0),
163 compute_capability_major: device
164 .attribute(Attr::COMPUTE_CAPABILITY_MAJOR)
165 .unwrap_or(0),
166 compute_capability_minor: device
167 .attribute(Attr::COMPUTE_CAPABILITY_MINOR)
168 .unwrap_or(0),
169 integrated: device.attribute(Attr::INTEGRATED).unwrap_or(0) != 0,
170 concurrent_kernels: device.attribute(Attr::CONCURRENT_KERNELS).unwrap_or(0) != 0,
171 pci_bus_id: device.attribute(Attr::PCI_BUS_ID).unwrap_or(0),
172 pci_device_id: device.attribute(Attr::PCI_DEVICE_ID).unwrap_or(0),
173 pci_domain_id: device.attribute(Attr::PCI_DOMAIN_ID).unwrap_or(0),
174 })
175}
176
177pub unsafe fn func_attributes(func_symbol: *const core::ffi::c_void) -> Result<cudaFuncAttributes> {
186 let r = runtime()?;
187 let cu = r.cuda_func_get_attributes()?;
188 let mut attrs = cudaFuncAttributes::default();
189 check(cu(
190 &mut attrs as *mut cudaFuncAttributes as *mut core::ffi::c_void,
191 func_symbol,
192 ))?;
193 Ok(attrs)
194}