1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
//! Device enumeration + queries via the Runtime API.
//!
//! Unlike [`baracuda_driver::Device`], a [`Device`] in the Runtime API is
//! just an ordinal — there's no separate `CUdevice` handle. Contexts are
//! implicit (the "primary context" per device) and are shared with the
//! Driver API on the same device.
use baracuda_cuda_sys::runtime::{runtime, types::cudaDeviceAttr as Attr};
use crate::error::{check, Result};
/// A CUDA device (Runtime API view — a bare ordinal).
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Device(pub(crate) i32);
impl Device {
/// Number of CUDA devices visible to the process.
pub fn count() -> Result<u32> {
let r = runtime()?;
let cu = r.cuda_get_device_count()?;
let mut n: core::ffi::c_int = 0;
check(unsafe { cu(&mut n) })?;
Ok(n as u32)
}
/// Construct a `Device` for the given ordinal. Does not validate — use
/// [`Device::all`] if you want a checked enumeration.
#[inline]
pub const fn from_ordinal(ordinal: u32) -> Self {
Self(ordinal as i32)
}
/// All visible devices, in ordinal order.
pub fn all() -> Result<Vec<Self>> {
let count = Self::count()?;
Ok((0..count).map(Self::from_ordinal).collect())
}
/// Set this device as current on the calling thread. Subsequent Runtime
/// API calls (allocations, launches, ...) operate on this device.
pub fn set_current(&self) -> Result<()> {
let r = runtime()?;
let cu = r.cuda_set_device()?;
check(unsafe { cu(self.0) })
}
/// Retrieve the device currently selected on the calling thread.
pub fn current() -> Result<Self> {
let r = runtime()?;
let cu = r.cuda_get_device()?;
let mut dev: core::ffi::c_int = 0;
check(unsafe { cu(&mut dev) })?;
Ok(Self(dev))
}
/// Ordinal of this device (`0`, `1`, ...).
#[inline]
pub fn ordinal(&self) -> i32 {
self.0
}
/// Compute capability as `(major, minor)`.
pub fn compute_capability(&self) -> Result<(u32, u32)> {
Ok((
self.attribute(Attr::COMPUTE_CAPABILITY_MAJOR)? as u32,
self.attribute(Attr::COMPUTE_CAPABILITY_MINOR)? as u32,
))
}
/// Multiprocessor count.
pub fn multiprocessor_count(&self) -> Result<u32> {
Ok(self.attribute(Attr::MULTIPROCESSOR_COUNT)? as u32)
}
/// Warp size in threads.
pub fn warp_size(&self) -> Result<u32> {
Ok(self.attribute(Attr::WARP_SIZE)? as u32)
}
/// Raw device-attribute query. See [`baracuda_cuda_sys::runtime::types::cudaDeviceAttr`].
pub fn attribute(&self, attr: i32) -> Result<i32> {
let r = runtime()?;
let cu = r.cuda_device_get_attribute()?;
let mut val: core::ffi::c_int = 0;
check(unsafe { cu(&mut val, attr, self.0) })?;
Ok(val)
}
/// `true` if this device can peer-access `peer`'s allocations (P2P).
/// Call [`Device::enable_peer_access`] before actually using peer
/// pointers in kernels.
pub fn can_access_peer(&self, peer: &Device) -> Result<bool> {
let r = runtime()?;
let cu = r.cuda_device_can_access_peer()?;
let mut v: core::ffi::c_int = 0;
check(unsafe { cu(&mut v, self.0, peer.0) })?;
Ok(v != 0)
}
/// Enable peer access from the *current* device to `peer`'s
/// allocations. Call `Device::set_current()` on the accessing device
/// first.
pub fn enable_peer_access(peer: &Device) -> Result<()> {
let r = runtime()?;
let cu = r.cuda_device_enable_peer_access()?;
check(unsafe { cu(peer.0, 0) })
}
/// Disable peer access previously enabled via
/// [`Device::enable_peer_access`].
pub fn disable_peer_access(peer: &Device) -> Result<()> {
let r = runtime()?;
let cu = r.cuda_device_disable_peer_access()?;
check(unsafe { cu(peer.0) })
}
}