1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
//! Physical-GPU query and enumeration.
use core::ffi::c_char;
use baracuda_cuda_sys::types::CUdevice_attribute as Attr;
use baracuda_cuda_sys::{driver, CUdevice};
use crate::error::{check, Result};
use crate::init::init;
/// A CUDA device (a physical GPU, or a logical slice of one under MIG).
///
/// Cheap `Copy` type — it's just an ordinal.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Device(pub(crate) CUdevice);
impl Device {
/// Number of CUDA devices visible to the process.
pub fn count() -> Result<u32> {
init()?;
let d = driver()?;
let cu = d.cu_device_get_count()?;
let mut n: core::ffi::c_int = 0;
// SAFETY: `out` points to a writable i32.
check(unsafe { cu(&mut n) })?;
Ok(n as u32)
}
/// Retrieve the device with the given ordinal.
pub fn get(ordinal: u32) -> Result<Self> {
init()?;
let d = driver()?;
let cu = d.cu_device_get()?;
let mut dev = CUdevice::default();
// SAFETY: `dev` points to a writable CUdevice; the cast is widening on 64-bit.
check(unsafe { cu(&mut dev, ordinal as core::ffi::c_int) })?;
Ok(Self(dev))
}
/// All visible devices, in ordinal order.
pub fn all() -> Result<Vec<Self>> {
let count = Self::count()?;
(0..count).map(Self::get).collect()
}
/// Raw ordinal (`0`, `1`, ...).
#[inline]
pub fn ordinal(&self) -> i32 {
self.0 .0
}
/// Human-readable name, e.g. `"NVIDIA GeForce RTX 4090"`.
pub fn name(&self) -> Result<String> {
let d = driver()?;
let cu = d.cu_device_get_name()?;
let mut buf = vec![0u8; 256];
// SAFETY: `buf` is valid for writes of `buf.len()` bytes; the
// function is documented to null-terminate within the buffer.
check(unsafe {
cu(
buf.as_mut_ptr() as *mut c_char,
buf.len() as core::ffi::c_int,
self.0,
)
})?;
let nul = buf.iter().position(|&b| b == 0).unwrap_or(buf.len());
Ok(String::from_utf8_lossy(&buf[..nul]).into_owned())
}
/// Total global memory on this device, in bytes.
pub fn total_memory(&self) -> Result<u64> {
let d = driver()?;
let cu = d.cu_device_total_mem()?;
let mut bytes: usize = 0;
// SAFETY: writable pointer to `usize`; CUDA writes `size_t`.
check(unsafe { cu(&mut bytes, self.0) })?;
Ok(bytes as u64)
}
/// Compute capability as `(major, minor)`, e.g. `(9, 0)` for Hopper.
pub fn compute_capability(&self) -> Result<(u32, u32)> {
let major = self.attribute(Attr::COMPUTE_CAPABILITY_MAJOR)?;
let minor = self.attribute(Attr::COMPUTE_CAPABILITY_MINOR)?;
Ok((major as u32, minor as u32))
}
/// Multiprocessor count (SM count).
pub fn multiprocessor_count(&self) -> Result<u32> {
Ok(self.attribute(Attr::MULTIPROCESSOR_COUNT)? as u32)
}
/// Warp size in threads (almost always 32).
pub fn warp_size(&self) -> Result<u32> {
Ok(self.attribute(Attr::WARP_SIZE)? as u32)
}
/// Query an arbitrary `CUdevice_attribute`. See
/// [`baracuda_cuda_sys::types::CUdevice_attribute`] for the full list.
pub fn attribute(&self, attr: i32) -> Result<i32> {
let d = driver()?;
let cu = d.cu_device_get_attribute()?;
let mut val: core::ffi::c_int = 0;
// SAFETY: writable i32; `attr` is a valid CUDA attribute selector
// (caller supplied, but CUDA returns an error for invalid selectors
// rather than UB).
check(unsafe { cu(&mut val, attr, self.0) })?;
Ok(val)
}
/// The raw `CUdevice` handle. Use with care.
#[inline]
pub fn as_raw(&self) -> CUdevice {
self.0
}
/// Return the device's 16-byte UUID.
pub fn uuid(&self) -> Result<[u8; 16]> {
let d = driver()?;
let cu = d.cu_device_get_uuid()?;
let mut out = [0u8; 16];
check(unsafe { cu(out.as_mut_ptr(), self.0) })?;
Ok(out)
}
/// Return the device's Windows LUID and 32-bit device-node mask
/// (Windows only; Linux returns zeros).
pub fn luid(&self) -> Result<([u8; 8], u32)> {
let d = driver()?;
let cu = d.cu_device_get_luid()?;
let mut luid = [0i8; 8];
let mut mask: core::ffi::c_uint = 0;
check(unsafe { cu(luid.as_mut_ptr(), &mut mask, self.0) })?;
Ok((luid.map(|b| b as u8), mask))
}
/// Query a peer-to-peer attribute between `self` (as source) and
/// `peer` (as destination). Pass a constant from
/// [`baracuda_cuda_sys::types::CUdevice_P2PAttribute`].
pub fn p2p_attribute(&self, peer: &Device, attr: i32) -> Result<i32> {
let d = driver()?;
let cu = d.cu_device_get_p2p_attribute()?;
let mut v: core::ffi::c_int = 0;
check(unsafe { cu(&mut v, attr, self.0, peer.0) })?;
Ok(v)
}
/// Query whether this device supports a given exec-affinity type
/// (e.g. SM-count partitioning at context-creation time).
pub fn exec_affinity_support(&self, affinity_type: i32) -> Result<bool> {
let d = driver()?;
let cu = d.cu_device_get_exec_affinity_support()?;
let mut v: core::ffi::c_int = 0;
check(unsafe { cu(&mut v, affinity_type, self.0) })?;
Ok(v != 0)
}
/// `true` if this device can directly access allocations on `peer`.
/// Peer access still requires a matching `Context::enable_peer_access`
/// on the accessing side before kernels can dereference peer pointers.
pub fn can_access_peer(&self, peer: &Device) -> Result<bool> {
let d = driver()?;
let cu = d.cu_device_can_access_peer()?;
let mut out: core::ffi::c_int = 0;
check(unsafe { cu(&mut out, self.0, peer.0) })?;
Ok(out != 0)
}
/// Query the primary-context state for this device.
/// Returns `(flags, active)` — `flags` is the same bitmask
/// [`crate::Context::with_flags`] takes, `active` is `true` if some
/// caller currently holds a retained primary-context reference.
pub fn primary_ctx_state(&self) -> Result<(u32, bool)> {
let d = driver()?;
let cu = d.cu_device_primary_ctx_get_state()?;
let mut flags: core::ffi::c_uint = 0;
let mut active: core::ffi::c_int = 0;
check(unsafe { cu(self.0, &mut flags, &mut active) })?;
Ok((flags, active != 0))
}
/// Set the flags used when the primary context is later created.
/// Returns `CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE` if the primary context
/// already exists; reset with `Context::reset_primary` first.
pub fn set_primary_ctx_flags(&self, flags: u32) -> Result<()> {
let d = driver()?;
let cu = d.cu_device_primary_ctx_set_flags()?;
check(unsafe { cu(self.0, flags) })
}
}