Skip to main content

singe_cuda/
ipc.rs

1use std::{mem, ptr};
2
3use singe_cuda_sys::{driver, runtime};
4
5use crate::{
6    error::{Error, Result},
7    try_ffi,
8};
9
10bitflags::bitflags! {
11    /// Flags for [`IpcMemoryHandle::create_mapping`].
12    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13    pub struct IpcMemoryFlags: u32 {
14        const LAZY_ENABLE_PEER_ACCESS = driver::CUipcMem_flags::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS as _;
15    }
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19pub struct IpcEventHandle(runtime::cudaIpcEventHandle_t);
20
21impl IpcEventHandle {
22    pub const unsafe fn from_raw(handle: runtime::cudaIpcEventHandle_t) -> Self {
23        Self(handle)
24    }
25
26    pub const fn zeroed() -> Self {
27        unsafe { mem::zeroed() }
28    }
29
30    pub const fn as_raw(&self) -> runtime::cudaIpcEventHandle_t {
31        self.0
32    }
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
36pub struct IpcMemoryHandle(runtime::cudaIpcMemHandle_t);
37
38impl IpcMemoryHandle {
39    pub const unsafe fn from_raw(handle: runtime::cudaIpcMemHandle_t) -> Self {
40        Self(handle)
41    }
42
43    pub const fn zeroed() -> Self {
44        unsafe { mem::zeroed() }
45    }
46
47    pub const fn as_raw(&self) -> runtime::cudaIpcMemHandle_t {
48        self.0
49    }
50
51    /// Maps memory exported from another process with [`DeviceMemory::ipc_handle`](crate::memory::DeviceMemory::ipc_handle) into the current device address space.
52    /// For contexts on different devices, [`IpcMemoryHandle::create_mapping`] can attempt to enable peer access between the devices as if [`Device::enable_peer_access`](crate::device::Device::enable_peer_access) had been called.
53    /// This behavior is controlled by [`IpcMemoryFlags::LAZY_ENABLE_PEER_ACCESS`].
54    /// [`Device::can_access_peer`](crate::device::Device::can_access_peer) can determine if a mapping is possible.
55    ///
56    /// [`IpcMemoryHandle::create_mapping`] can open handles to devices that may not be visible in the current process.
57    ///
58    /// Imported memory handles from each device in a given process may only be opened by one context per device per other process.
59    ///
60    /// If the memory handle has already been opened by the current context, the reference count on the handle is incremented by 1 and the existing device pointer is returned.
61    ///
62    /// Memory returned from [`IpcMemoryHandle::create_mapping`] must be freed with [`OpenedIpcMemory::close`].
63    ///
64    /// Calling [`DeviceMemory::free`](crate::memory::DeviceMemory::free) on an
65    /// exported memory region before calling [`OpenedIpcMemory::close`] in the
66    /// importing context results in undefined behavior.
67    ///
68    /// IPC is restricted to devices with support for unified addressing on Linux and Windows operating systems.
69    /// IPC on Windows is supported for compatibility, but is not recommended
70    /// because it has a performance cost.
71    /// Check device IPC support through the device properties exposed by this crate, for example [`DeviceProperties::ipc_event_supported`](crate::device::DeviceProperties::ipc_event_supported).
72    ///
73    /// Additional CUDA diagnostics:
74    ///
75    /// * This call may also return [`crate::error::Status::NotInitialized`], [`crate::error::Status::CallRequiresNewerDriver`], or [`crate::error::Status::NoDevice`] if it initializes internal CUDA runtime state.
76    /// * Callbacks must not call CUDA functions; see [`Stream::add_callback`](crate::stream::Stream::add_callback).
77    ///   [`crate::error::Status::NotPermitted`] may, but is not guaranteed to, be returned as a diagnostic in that case.
78    /// * No guarantees are made about the address returned.
79    ///   In particular, multiple processes may not receive the same address for the same handle.
80    ///
81    /// # Errors
82    ///
83    /// Returns an error if CUDA Runtime cannot open the IPC handle, if the
84    /// current device cannot access the allocation, or if CUDA returns a null
85    /// mapped pointer.
86    pub fn create_mapping<T>(self, flags: IpcMemoryFlags) -> Result<OpenedIpcMemory<T>> {
87        let mut dev_ptr = ptr::null_mut();
88        unsafe {
89            try_ffi!(runtime::cudaIpcOpenMemHandle(
90                &raw mut dev_ptr,
91                self.as_raw(),
92                flags.bits()
93            ))?;
94        }
95        if dev_ptr.is_null() {
96            return Err(Error::NullHandle);
97        }
98
99        Ok(OpenedIpcMemory {
100            ptr: dev_ptr.cast(),
101        })
102    }
103}
104
105#[derive(Debug)]
106pub struct OpenedIpcMemory<T> {
107    ptr: *mut T,
108}
109
110impl<T> OpenedIpcMemory<T> {
111    pub const fn as_ptr(&self) -> *mut T {
112        self.ptr
113    }
114
115    /// Decrements the reference count of the memory returned by [`IpcMemoryHandle::create_mapping`] by 1.
116    /// When the reference count reaches 0, this unmaps the memory.
117    /// The original allocation in the exporting process and imported mappings
118    /// in other processes are unaffected.
119    ///
120    /// Resources used to enable peer access are freed if this is the last
121    /// mapping using them.
122    ///
123    /// IPC is restricted to devices with support for unified addressing on Linux and Windows operating systems.
124    /// IPC on Windows is supported for compatibility, but is not recommended
125    /// because it has a performance cost.
126    /// Check device IPC support through the device properties exposed by this crate, for example [`DeviceProperties::ipc_event_supported`](crate::device::DeviceProperties::ipc_event_supported).
127    ///
128    /// Additional CUDA diagnostics:
129    ///
130    /// * This call may also return [`crate::error::Status::NotInitialized`], [`crate::error::Status::CallRequiresNewerDriver`], or [`crate::error::Status::NoDevice`] if it initializes internal CUDA runtime state.
131    /// * Callbacks must not call CUDA functions; see [`Stream::add_callback`](crate::stream::Stream::add_callback).
132    ///   [`crate::error::Status::NotPermitted`] may, but is not guaranteed to, be returned as a diagnostic in that case.
133    ///
134    /// # Errors
135    ///
136    /// Returns an error if CUDA Runtime cannot close the imported mapping.
137    pub fn close(self) -> Result<()> {
138        let ptr: *mut () = self.ptr.cast();
139        mem::forget(self);
140        if ptr.is_null() {
141            return Ok(());
142        }
143        unsafe { try_ffi!(runtime::cudaIpcCloseMemHandle(ptr as _)) }
144    }
145}
146
147impl<T> Drop for OpenedIpcMemory<T> {
148    fn drop(&mut self) {
149        if self.ptr.is_null() {
150            return;
151        }
152
153        unsafe {
154            if let Err(err) = try_ffi!(runtime::cudaIpcCloseMemHandle(self.ptr.cast())) {
155                #[cfg(debug_assertions)]
156                eprintln!("failed to close cuda ipc memory handle: {err}");
157            }
158        }
159    }
160}