use std::sync::Arc;
use baracuda_cuda_sys::runtime::runtime;
use baracuda_cuda_sys::runtime::types::{
cudaMemAccessDesc, cudaMemAllocationHandleType, cudaMemAllocationType, cudaMemLocation,
cudaMemLocationType, cudaMemPoolAttr, cudaMemPoolProps, cudaMemPoolPtrExportData,
cudaMemPool_t,
};
use crate::device::Device;
use crate::error::{check, Result};
use crate::stream::Stream;
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum AccessFlags {
None,
Read,
ReadWrite,
}
impl AccessFlags {
#[inline]
fn raw(self) -> core::ffi::c_int {
use baracuda_cuda_sys::runtime::types::cudaMemAccessFlags;
match self {
AccessFlags::None => cudaMemAccessFlags::NONE,
AccessFlags::Read => cudaMemAccessFlags::READ,
AccessFlags::ReadWrite => cudaMemAccessFlags::READ_WRITE,
}
}
#[inline]
fn from_raw(raw: core::ffi::c_int) -> Self {
use baracuda_cuda_sys::runtime::types::cudaMemAccessFlags;
match raw {
x if x == cudaMemAccessFlags::READ => AccessFlags::Read,
x if x == cudaMemAccessFlags::READ_WRITE => AccessFlags::ReadWrite,
_ => AccessFlags::None,
}
}
}
#[derive(Clone)]
pub struct MemoryPool {
inner: Arc<MemoryPoolInner>,
}
struct MemoryPoolInner {
handle: cudaMemPool_t,
owned: bool,
}
unsafe impl Send for MemoryPoolInner {}
unsafe impl Sync for MemoryPoolInner {}
impl core::fmt::Debug for MemoryPoolInner {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("MemoryPool")
.field("handle", &self.handle)
.field("owned", &self.owned)
.finish()
}
}
impl core::fmt::Debug for MemoryPool {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.inner.fmt(f)
}
}
impl MemoryPool {
pub fn new(device: &Device) -> Result<Self> {
let r = runtime()?;
let cu = r.cuda_mem_pool_create()?;
let props = cudaMemPoolProps {
alloc_type: cudaMemAllocationType::PINNED,
handle_types: cudaMemAllocationHandleType::NONE,
location: cudaMemLocation {
type_: cudaMemLocationType::DEVICE,
id: device.ordinal(),
},
..Default::default()
};
let mut handle: cudaMemPool_t = core::ptr::null_mut();
check(unsafe { cu(&mut handle, &props) })?;
Ok(Self {
inner: Arc::new(MemoryPoolInner {
handle,
owned: true,
}),
})
}
pub unsafe fn from_borrowed(handle: cudaMemPool_t) -> Self {
Self {
inner: Arc::new(MemoryPoolInner {
handle,
owned: false,
}),
}
}
#[inline]
pub fn as_raw(&self) -> cudaMemPool_t {
self.inner.handle
}
pub fn set_release_threshold(&self, bytes: u64) -> Result<()> {
let r = runtime()?;
let cu = r.cuda_mem_pool_set_attribute()?;
let mut v = bytes;
check(unsafe {
cu(
self.inner.handle,
cudaMemPoolAttr::RELEASE_THRESHOLD,
&mut v as *mut u64 as *mut core::ffi::c_void,
)
})
}
pub fn release_threshold(&self) -> Result<u64> {
self.get_u64_attr(cudaMemPoolAttr::RELEASE_THRESHOLD)
}
pub fn used_bytes(&self) -> Result<u64> {
self.get_u64_attr(cudaMemPoolAttr::USED_MEM_CURRENT)
}
pub fn reserved_bytes(&self) -> Result<u64> {
self.get_u64_attr(cudaMemPoolAttr::RESERVED_MEM_CURRENT)
}
fn get_u64_attr(&self, attr: i32) -> Result<u64> {
let r = runtime()?;
let cu = r.cuda_mem_pool_get_attribute()?;
let mut v: u64 = 0;
check(unsafe {
cu(
self.inner.handle,
attr,
&mut v as *mut u64 as *mut core::ffi::c_void,
)
})?;
Ok(v)
}
pub fn trim_to(&self, min_bytes_to_keep: usize) -> Result<()> {
let r = runtime()?;
let cu = r.cuda_mem_pool_trim_to()?;
check(unsafe { cu(self.inner.handle, min_bytes_to_keep) })
}
pub fn set_access(&self, device: &Device, flags: AccessFlags) -> Result<()> {
let r = runtime()?;
let cu = r.cuda_mem_pool_set_access()?;
let desc = cudaMemAccessDesc {
location: cudaMemLocation {
type_: cudaMemLocationType::DEVICE,
id: device.ordinal(),
},
flags: flags.raw(),
};
check(unsafe { cu(self.inner.handle, &desc, 1) })
}
pub fn access(&self, device: &Device) -> Result<AccessFlags> {
let r = runtime()?;
let cu = r.cuda_mem_pool_get_access()?;
let mut loc = cudaMemLocation {
type_: cudaMemLocationType::DEVICE,
id: device.ordinal(),
};
let mut flags: core::ffi::c_int = 0;
check(unsafe { cu(&mut flags, self.inner.handle, &mut loc) })?;
Ok(AccessFlags::from_raw(flags))
}
pub fn alloc_async(&self, bytes: usize, stream: &Stream) -> Result<*mut core::ffi::c_void> {
let r = runtime()?;
let cu = r.cuda_malloc_from_pool_async()?;
let mut ptr: *mut core::ffi::c_void = core::ptr::null_mut();
check(unsafe { cu(&mut ptr, bytes, self.inner.handle, stream.as_raw()) })?;
Ok(ptr)
}
pub unsafe fn free_async(&self, ptr: *mut core::ffi::c_void, stream: &Stream) -> Result<()> { unsafe {
let r = runtime()?;
let cu = r.cuda_free_async()?;
check(cu(ptr, stream.as_raw()))
}}
pub unsafe fn export_pointer(
&self,
ptr: *mut core::ffi::c_void,
) -> Result<cudaMemPoolPtrExportData> { unsafe {
let r = runtime()?;
let cu = r.cuda_mem_pool_export_pointer()?;
let mut data = cudaMemPoolPtrExportData::default();
check(cu(&mut data, ptr))?;
Ok(data)
}}
pub fn import_pointer(
&self,
mut data: cudaMemPoolPtrExportData,
) -> Result<*mut core::ffi::c_void> {
let r = runtime()?;
let cu = r.cuda_mem_pool_import_pointer()?;
let mut ptr: *mut core::ffi::c_void = core::ptr::null_mut();
check(unsafe { cu(&mut ptr, self.inner.handle, &mut data) })?;
Ok(ptr)
}
}
impl Drop for MemoryPoolInner {
fn drop(&mut self) {
if !self.owned || self.handle.is_null() {
return;
}
if let Ok(r) = runtime() {
if let Ok(cu) = r.cuda_mem_pool_destroy() {
let _ = unsafe { cu(self.handle) };
}
}
}
}
pub fn default_pool(device: &Device) -> Result<MemoryPool> {
let r = runtime()?;
let cu = r.cuda_device_get_default_mem_pool()?;
let mut handle: cudaMemPool_t = core::ptr::null_mut();
check(unsafe { cu(&mut handle, device.ordinal()) })?;
Ok(unsafe { MemoryPool::from_borrowed(handle) })
}
pub fn current_pool(device: &Device) -> Result<MemoryPool> {
let r = runtime()?;
let cu = r.cuda_device_get_mem_pool()?;
let mut handle: cudaMemPool_t = core::ptr::null_mut();
check(unsafe { cu(&mut handle, device.ordinal()) })?;
Ok(unsafe { MemoryPool::from_borrowed(handle) })
}
pub fn set_current_pool(device: &Device, pool: &MemoryPool) -> Result<()> {
let r = runtime()?;
let cu = r.cuda_device_set_mem_pool()?;
check(unsafe { cu(device.ordinal(), pool.as_raw()) })
}