1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#[allow(unused_imports)]
use crate::error::ErrorCode;
use std::{mem, ptr};
use singe_cuda_sys::{driver, runtime};
use crate::{
error::{Error, Result},
try_cuda,
};
bitflags::bitflags! {
/// Flags for [`IpcMemoryHandle::open`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct IpcMemoryFlags: u32 {
const LAZY_ENABLE_PEER_ACCESS = driver::CUipcMem_flags::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS as _;
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct IpcEventHandle(runtime::cudaIpcEventHandle_t);
impl IpcEventHandle {
pub const unsafe fn from_raw(handle: runtime::cudaIpcEventHandle_t) -> Self {
Self(handle)
}
pub const fn zeroed() -> Self {
unsafe { mem::zeroed() }
}
pub const unsafe fn as_raw(&self) -> runtime::cudaIpcEventHandle_t {
self.0
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct IpcMemoryHandle(runtime::cudaIpcMemHandle_t);
impl IpcMemoryHandle {
pub const unsafe fn from_raw(handle: runtime::cudaIpcMemHandle_t) -> Self {
Self(handle)
}
pub const fn zeroed() -> Self {
unsafe { mem::zeroed() }
}
pub const unsafe fn as_raw(&self) -> runtime::cudaIpcMemHandle_t {
self.0
}
/// Maps memory exported from another process with [`DeviceMemory::ipc_handle`](crate::memory::DeviceMemory::ipc_handle) into the current device address space.
/// For contexts on different devices [`IpcMemoryHandle::open`] can attempt to enable peer access between the devices as if the user called [`Device::enable_peer_access`](crate::device::Device::enable_peer_access).
/// This behavior is controlled by [`IpcMemoryFlags::LAZY_ENABLE_PEER_ACCESS`].
/// [`Device::can_access_peer`](crate::device::Device::can_access_peer) can determine if a mapping is possible.
///
/// [`IpcMemoryHandle::open`] can open handles to devices that may not be visible in the process calling the API.
///
/// Imported memory handles from each device in a given process may only be opened by one context per device per other process.
///
/// If the memory handle has already been opened by the current context, the reference count on the handle is incremented by 1 and the existing device pointer is returned.
///
/// Memory returned from [`IpcMemoryHandle::open`] must be freed with [`OpenedIpcMemory::close`].
///
/// Calling [`DeviceMemory::free`](crate::memory::DeviceMemory::free) on an exported memory region before calling [`OpenedIpcMemory::close`] in the importing context will result in undefined behavior.
///
/// IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems.
/// IPC functionality on Windows is supported for compatibility purposes but not recommended as it comes with performance cost.
/// Users can test their device for IPC functionality through the device properties exposed by this crate, for example [`DeviceProperties::ipc_event_supported`](crate::device::DeviceProperties::ipc_event_supported).
///
/// Note:
///
/// * Note that this function may also return [`ErrorCode::NotInitialized`], [`ErrorCode::CallRequiresNewerDriver`] or [`ErrorCode::NoDevice`] if this call tries to initialize internal CUDA RT state.
/// * Note that as specified by [`Stream::add_callback`](crate::stream::Stream::add_callback) no CUDA function may be called from callback.
/// [`ErrorCode::NotPermitted`] may, but is not guaranteed to, be returned as a diagnostic in such case.
/// * No guarantees are made about the address returned.
/// In particular, multiple processes may not receive the same address for the same handle.
pub fn open<T>(self, flags: IpcMemoryFlags) -> Result<OpenedIpcMemory<T>> {
let mut dev_ptr = ptr::null_mut();
unsafe {
try_cuda!(runtime::cudaIpcOpenMemHandle(
&raw mut dev_ptr,
self.as_raw(),
flags.bits()
))?;
}
if dev_ptr.is_null() {
return Err(Error::NullHandle);
}
Ok(OpenedIpcMemory {
ptr: dev_ptr.cast(),
})
}
}
#[derive(Debug)]
pub struct OpenedIpcMemory<T> {
ptr: *mut T,
}
impl<T> OpenedIpcMemory<T> {
pub const fn as_ptr(&self) -> *mut T {
self.ptr
}
/// Decrements the reference count of the memory returned by [`IpcMemoryHandle::open`] by 1.
/// When the reference count reaches 0, this API unmaps the memory.
/// The original allocation in the exporting process as well as imported mappings in other processes will be unaffected.
///
/// Any resources used to enable peer access will be freed if this is the last mapping using them.
///
/// IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems.
/// IPC functionality on Windows is supported for compatibility purposes but not recommended as it comes with performance cost.
/// Users can test their device for IPC functionality through the device properties exposed by this crate, for example [`DeviceProperties::ipc_event_supported`](crate::device::DeviceProperties::ipc_event_supported).
///
/// Note:
///
/// * Note that this function may also return [`ErrorCode::NotInitialized`], [`ErrorCode::CallRequiresNewerDriver`] or [`ErrorCode::NoDevice`] if this call tries to initialize internal CUDA RT state.
/// * Note that as specified by [`Stream::add_callback`](crate::stream::Stream::add_callback) no CUDA function may be called from callback.
/// [`ErrorCode::NotPermitted`] may, but is not guaranteed to, be returned as a diagnostic in such case.
pub fn close(self) -> Result<()> {
let ptr: *mut () = self.ptr.cast();
mem::forget(self);
if ptr.is_null() {
return Ok(());
}
unsafe { try_cuda!(runtime::cudaIpcCloseMemHandle(ptr as _)) }
}
}
impl<T> Drop for OpenedIpcMemory<T> {
fn drop(&mut self) {
if self.ptr.is_null() {
return;
}
unsafe {
if let Err(err) = try_cuda!(runtime::cudaIpcCloseMemHandle(self.ptr.cast())) {
#[cfg(debug_assertions)]
eprintln!("failed to close cuda ipc memory handle: {err}");
}
}
}
}