use super::*;
use std::ffi::c_void;
fn memory_type<T>(ptr: *const T) -> MemoryType {
match get_attr(ptr, CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMORY_TYPE) {
Ok(CUmemorytype_enum::CU_MEMORYTYPE_HOST) => MemoryType::PageLocked,
Ok(CUmemorytype_enum::CU_MEMORYTYPE_DEVICE) => MemoryType::Device,
Ok(CUmemorytype_enum::CU_MEMORYTYPE_ARRAY) => MemoryType::Array,
Ok(CUmemorytype_enum::CU_MEMORYTYPE_UNIFIED) => {
unreachable!("CU_POINTER_ATTRIBUTE_MEMORY_TYPE never be UNIFED")
}
Err(_) => {
MemoryType::Host
}
}
}
impl<T: Scalar> Memory for [T] {
type Elem = T;
fn head_addr(&self) -> *const T {
self.as_ptr()
}
fn head_addr_mut(&mut self) -> *mut T {
self.as_mut_ptr()
}
fn num_elem(&self) -> usize {
self.len()
}
fn memory_type(&self) -> MemoryType {
memory_type(self.as_ptr())
}
}
impl<T: Scalar> Memcpy<PageLockedMemory<T>> for [T] {
fn copy_from(&mut self, src: &PageLockedMemory<T>) {
assert_ne!(self.head_addr(), src.head_addr());
assert_eq!(self.num_elem(), src.num_elem());
match self.memory_type() {
MemoryType::Host | MemoryType::PageLocked => self.copy_from(src),
MemoryType::Device => unsafe {
contexted_call!(
&src.get_context(),
cuMemcpyHtoD_v2,
self.head_addr_mut() as CUdeviceptr,
src.as_ptr() as *mut c_void,
self.num_elem() * T::size_of()
)
}
.expect("memcpy from Page-locked memory to Device failed"),
MemoryType::Array => {
unimplemented!("Dynamical cast from slice to CUDA Array is not supported yet")
}
}
}
}
impl<T: Scalar> Memcpy<[T]> for PageLockedMemory<T> {
fn copy_from(&mut self, src: &[T]) {
assert_ne!(self.head_addr(), src.head_addr());
assert_eq!(self.num_elem(), src.num_elem());
match src.memory_type() {
MemoryType::Host | MemoryType::PageLocked => self.copy_from_slice(src),
MemoryType::Device => unsafe {
contexted_call!(
&self.get_context(),
cuMemcpyDtoH_v2,
self.head_addr_mut() as *mut c_void,
src.as_ptr() as CUdeviceptr,
self.num_elem() * T::size_of()
)
}
.expect("memcpy from Device to Page-locked memory failed"),
MemoryType::Array => unreachable!("Array cannot be casted to a slice"),
}
}
}
impl<T: Scalar> Memcpy<RegisteredMemory<'_, T>> for [T] {
fn copy_from(&mut self, src: &RegisteredMemory<'_, T>) {
assert_ne!(self.head_addr(), src.head_addr());
assert_eq!(self.num_elem(), src.num_elem());
match self.memory_type() {
MemoryType::Host | MemoryType::PageLocked => self.copy_from(src),
MemoryType::Device => unsafe {
contexted_call!(
&src.get_context(),
cuMemcpyHtoD_v2,
self.head_addr_mut() as CUdeviceptr,
src.as_ptr() as *mut c_void,
self.num_elem() * T::size_of()
)
}
.expect("memcpy from registered host memory to Device failed"),
MemoryType::Array => {
unimplemented!("Dynamical cast from slice to CUDA Array is not supported yet")
}
}
}
}
impl<T: Scalar> Memcpy<[T]> for RegisteredMemory<'_, T> {
fn copy_from(&mut self, src: &[T]) {
assert_ne!(self.head_addr(), src.head_addr());
assert_eq!(self.num_elem(), src.num_elem());
match src.memory_type() {
MemoryType::Host | MemoryType::PageLocked => self.copy_from_slice(src),
MemoryType::Device => unsafe {
contexted_call!(
&self.get_context(),
cuMemcpyDtoH_v2,
self.head_addr_mut() as *mut c_void,
src.as_ptr() as CUdeviceptr,
self.num_elem() * T::size_of()
)
}
.expect("memcpy from Device to registered host memory failed"),
MemoryType::Array => unreachable!("Array cannot be casted to a slice"),
}
}
}
impl<T: Scalar> Memcpy<DeviceMemory<T>> for [T] {
fn copy_from(&mut self, src: &DeviceMemory<T>) {
assert_ne!(self.head_addr(), src.head_addr());
assert_eq!(self.num_elem(), src.num_elem());
match self.memory_type() {
MemoryType::Host | MemoryType::PageLocked => unsafe {
contexted_call!(
&src.get_context(),
cuMemcpyDtoH_v2,
self.head_addr_mut() as *mut c_void,
src.as_ptr() as CUdeviceptr,
self.num_elem() * T::size_of()
)
}
.expect("memcpy from Device to Host memory failed"),
MemoryType::Device => unsafe {
contexted_call!(
&src.get_context(),
cuMemcpyDtoD_v2,
self.head_addr_mut() as CUdeviceptr,
src.as_ptr() as CUdeviceptr,
self.num_elem() * T::size_of()
)
}
.expect("memcpy from Page-locked memory to Device failed"),
MemoryType::Array => {
unimplemented!("Dynamical cast from slice to CUDA Array is not supported yet")
}
}
}
}
impl<T: Scalar> Memcpy<[T]> for DeviceMemory<T> {
fn copy_from(&mut self, src: &[T]) {
assert_ne!(self.head_addr(), src.head_addr());
assert_eq!(self.num_elem(), src.num_elem());
match src.memory_type() {
MemoryType::Host | MemoryType::PageLocked => unsafe {
contexted_call!(
&self.get_context(),
cuMemcpyHtoD_v2,
self.head_addr_mut() as CUdeviceptr,
src.as_ptr() as *mut c_void,
self.num_elem() * T::size_of()
)
}
.expect("memcpy from Page-locked memory to Device failed"),
MemoryType::Device => unsafe {
contexted_call!(
&self.get_context(),
cuMemcpyDtoD_v2,
self.head_addr_mut() as CUdeviceptr,
src.as_ptr() as CUdeviceptr,
self.num_elem() * T::size_of()
)
}
.expect("memcpy from Device to Page-locked memory failed"),
MemoryType::Array => unreachable!("Array cannot be casted to a slice"),
}
}
}
impl<T: Scalar> Continuous for [T] {
fn as_slice(&self) -> &[Self::Elem] {
self
}
fn as_mut_slice(&mut self) -> &mut [Self::Elem] {
self
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn memory_type_host_vec() -> error::Result<()> {
let a = vec![0_u32; 12];
assert_eq!(a.as_slice().memory_type(), MemoryType::Host);
assert_eq!(a.as_slice().num_elem(), 12);
Ok(())
}
#[test]
fn memory_type_host_vec_with_context() -> error::Result<()> {
let device = Device::nth(0)?;
let _ctx = device.create_context();
let a = vec![0_u32; 12];
assert_eq!(a.as_slice().memory_type(), MemoryType::Host);
assert_eq!(a.as_slice().num_elem(), 12);
Ok(())
}
}