use std::ffi::c_void;
use crate::driver::context::{get_driver, CudaContext};
use crate::driver::stream::CudaStream;
use crate::driver::sys::CudaDriver;
use crate::GpuError;
use super::buffer::GpuBuffer;
impl<T: Copy> GpuBuffer<T> {
pub fn copy_from_host(&mut self, data: &[T]) -> Result<(), GpuError> {
if data.len() != self.len {
return Err(GpuError::Transfer(format!(
"Length mismatch: host {} vs device {}",
data.len(),
self.len
)));
}
if self.len == 0 {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = self.size_bytes();
let result =
unsafe { (driver.cuMemcpyHtoD)(self.ptr, data.as_ptr() as *const c_void, size) };
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub fn copy_to_host(&self, data: &mut [T]) -> Result<(), GpuError> {
if data.len() > self.len {
return Err(GpuError::Transfer(format!(
"Host buffer too large: host {} > device {}",
data.len(),
self.len
)));
}
if data.is_empty() {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = data.len() * std::mem::size_of::<T>();
let result =
unsafe { (driver.cuMemcpyDtoH)(data.as_mut_ptr() as *mut c_void, self.ptr, size) };
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub unsafe fn copy_from_host_async(
&mut self,
data: &[T],
stream: &CudaStream,
) -> Result<(), GpuError> {
if data.len() != self.len {
return Err(GpuError::Transfer(format!(
"Length mismatch: host {} vs device {}",
data.len(),
self.len
)));
}
if self.len == 0 {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = self.size_bytes();
let result = unsafe {
(driver.cuMemcpyHtoDAsync)(self.ptr, data.as_ptr() as *const c_void, size, stream.raw())
};
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub unsafe fn copy_to_host_async(
&self,
data: &mut [T],
stream: &CudaStream,
) -> Result<(), GpuError> {
if data.len() != self.len {
return Err(GpuError::Transfer(format!(
"Length mismatch: host {} vs device {}",
data.len(),
self.len
)));
}
if self.len == 0 {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = self.size_bytes();
let result = unsafe {
(driver.cuMemcpyDtoHAsync)(
data.as_mut_ptr() as *mut c_void,
self.ptr,
size,
stream.raw(),
)
};
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub fn from_host(ctx: &CudaContext, data: &[T]) -> Result<Self, GpuError> {
let mut buf = Self::new(ctx, data.len())?;
buf.copy_from_host(data)?;
Ok(buf)
}
pub fn copy_from_host_at(&mut self, data: &[T], offset: usize) -> Result<(), GpuError> {
if offset + data.len() > self.len {
return Err(GpuError::Transfer(format!(
"Partial copy out of bounds: offset {} + len {} > buffer {}",
offset,
data.len(),
self.len
)));
}
if data.is_empty() {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = std::mem::size_of_val(data);
let dst_ptr = self.ptr + (offset * std::mem::size_of::<T>()) as u64;
let result =
unsafe { (driver.cuMemcpyHtoD)(dst_ptr, data.as_ptr() as *const c_void, size) };
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub fn copy_to_host_at(&self, data: &mut [T], offset: usize) -> Result<(), GpuError> {
if offset + data.len() > self.len {
return Err(GpuError::Transfer(format!(
"Partial copy out of bounds: offset {} + len {} > buffer {}",
offset,
data.len(),
self.len
)));
}
if data.is_empty() {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = std::mem::size_of_val(data);
let src_ptr = self.ptr + (offset * std::mem::size_of::<T>()) as u64;
let result =
unsafe { (driver.cuMemcpyDtoH)(data.as_mut_ptr() as *mut c_void, src_ptr, size) };
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub fn clone(&self, ctx: &CudaContext) -> Result<Self, GpuError> {
let mut new_buffer = GpuBuffer::new(ctx, self.len)?;
new_buffer.copy_from_buffer(self)?;
Ok(new_buffer)
}
pub fn copy_from_buffer(&mut self, src: &GpuBuffer<T>) -> Result<(), GpuError> {
if src.len != self.len {
return Err(GpuError::Transfer(format!(
"PAR-023: D2D length mismatch: src {} vs dst {}",
src.len, self.len
)));
}
if self.len == 0 {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = self.size_bytes();
let result = unsafe { (driver.cuMemcpyDtoD)(self.ptr, src.ptr, size) };
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub fn copy_from_buffer_at(
&mut self,
src: &GpuBuffer<T>,
dst_offset: usize,
src_offset: usize,
count: usize,
) -> Result<(), GpuError> {
if dst_offset + count > self.len {
return Err(GpuError::Transfer(format!(
"PAR-023: D2D dst out of bounds: {} + {} > {}",
dst_offset, count, self.len
)));
}
if src_offset + count > src.len {
return Err(GpuError::Transfer(format!(
"PAR-023: D2D src out of bounds: {} + {} > {}",
src_offset, count, src.len
)));
}
if count == 0 {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = count * std::mem::size_of::<T>();
let dst_ptr = self.ptr + (dst_offset * std::mem::size_of::<T>()) as u64;
let src_ptr = src.ptr + (src_offset * std::mem::size_of::<T>()) as u64;
let result = unsafe { (driver.cuMemcpyDtoD)(dst_ptr, src_ptr, size) };
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub unsafe fn copy_from_buffer_async(
&mut self,
src: &GpuBuffer<T>,
stream: &CudaStream,
) -> Result<(), GpuError> {
if src.len != self.len {
return Err(GpuError::Transfer(format!(
"PAR-023: Async D2D length mismatch: src {} vs dst {}",
src.len, self.len
)));
}
if self.len == 0 {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = self.size_bytes();
let result = unsafe { (driver.cuMemcpyDtoDAsync)(self.ptr, src.ptr, size, stream.raw()) };
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub unsafe fn copy_from_buffer_at_async(
&mut self,
src: &GpuBuffer<T>,
dst_offset: usize,
src_offset: usize,
count: usize,
stream: &CudaStream,
) -> Result<(), GpuError> {
if dst_offset + count > self.len {
return Err(GpuError::Transfer(format!(
"PAR-023: Async D2D dst out of bounds: {} + {} > {}",
dst_offset, count, self.len
)));
}
if src_offset + count > src.len {
return Err(GpuError::Transfer(format!(
"PAR-023: Async D2D src out of bounds: {} + {} > {}",
src_offset, count, src.len
)));
}
if count == 0 {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = count * std::mem::size_of::<T>();
let dst_ptr = self.ptr + (dst_offset * std::mem::size_of::<T>()) as u64;
let src_ptr = src.ptr + (src_offset * std::mem::size_of::<T>()) as u64;
let result = unsafe { (driver.cuMemcpyDtoDAsync)(dst_ptr, src_ptr, size, stream.raw()) };
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
pub unsafe fn copy_from_buffer_at_async_raw(
&mut self,
src: &GpuBuffer<T>,
dst_offset: usize,
src_offset: usize,
count: usize,
stream_handle: crate::driver::sys::CUstream,
) -> Result<(), GpuError> {
if dst_offset + count > self.len {
return Err(GpuError::Transfer(format!(
"PAR-023: Async D2D dst out of bounds: {} + {} > {}",
dst_offset, count, self.len
)));
}
if src_offset + count > src.len {
return Err(GpuError::Transfer(format!(
"PAR-023: Async D2D src out of bounds: {} + {} > {}",
src_offset, count, src.len
)));
}
if count == 0 {
return Ok(());
}
self.ensure_context()?;
let driver = get_driver()?;
let size = count * std::mem::size_of::<T>();
let dst_ptr = self.ptr + (dst_offset * std::mem::size_of::<T>()) as u64;
let src_ptr = src.ptr + (src_offset * std::mem::size_of::<T>()) as u64;
let result = unsafe { (driver.cuMemcpyDtoDAsync)(dst_ptr, src_ptr, size, stream_handle) };
CudaDriver::check(result).map_err(|e| GpuError::Transfer(e.to_string()))
}
}