use cudarc::driver::sys;
use xlog_core::{Result, XlogError};
use crate::device_runtime::StreamId;
use crate::launch::LaunchRecorder;
use crate::memory::{CudaColumn, TrackedCudaSlice};
impl super::CudaKernelProvider {
pub fn memset_recorded(
&self,
dst: &mut TrackedCudaSlice<u8>,
value: u8,
launch_stream: StreamId,
) -> Result<()> {
let runtime = self.memory().runtime().ok_or_else(|| {
XlogError::Kernel(
"memset_recorded requires a runtime-backed GpuMemoryManager \
(constructed via with_runtime)"
.to_string(),
)
})?;
let pool = runtime.stream_pool();
let cu_stream = pool.resolve(launch_stream).ok_or_else(|| {
XlogError::Kernel(format!(
"memset_recorded: launch_stream StreamId({}) does not resolve",
launch_stream.0
))
})?;
let dst_ptr = dst.device_ptr_value();
let dst_len = dst.len();
let mut rec = LaunchRecorder::new_strict(launch_stream);
rec.write(dst);
rec.preflight(runtime).map_err(|e| {
XlogError::Kernel(format!(
"memset_recorded: launch recorder preflight failed: {}",
e
))
})?;
unsafe {
let res = sys::cuMemsetD8Async(dst_ptr, value, dst_len, cu_stream.cu_stream());
if res != sys::cudaError_enum::CUDA_SUCCESS {
return Err(XlogError::Kernel(format!(
"cuMemsetD8Async failed: {:?}",
res
)));
}
}
rec.commit(runtime).map_err(|e| {
XlogError::Kernel(format!(
"memset_recorded: launch recorder commit failed: {}",
e
))
})?;
Ok(())
}
pub fn memset_column_recorded(
&self,
dst: &mut CudaColumn,
value: u8,
launch_stream: StreamId,
) -> Result<()> {
let runtime = self.memory().runtime().ok_or_else(|| {
XlogError::Kernel(
"memset_column_recorded requires a runtime-backed GpuMemoryManager".to_string(),
)
})?;
let pool = runtime.stream_pool();
let cu_stream = pool.resolve(launch_stream).ok_or_else(|| {
XlogError::Kernel(format!(
"memset_column_recorded: launch_stream StreamId({}) does not resolve",
launch_stream.0
))
})?;
let dst_ptr = *dst.device_ptr();
let dst_len = <CudaColumn as cudarc::driver::DeviceSlice<u8>>::len(dst);
let mut rec = LaunchRecorder::new_strict(launch_stream);
rec.write_column(dst);
rec.preflight(runtime).map_err(|e| {
XlogError::Kernel(format!(
"memset_column_recorded: launch recorder preflight failed: {}",
e
))
})?;
unsafe {
let res = sys::cuMemsetD8Async(dst_ptr, value, dst_len, cu_stream.cu_stream());
if res != sys::cudaError_enum::CUDA_SUCCESS {
return Err(XlogError::Kernel(format!(
"cuMemsetD8Async (column) failed: {:?}",
res
)));
}
}
rec.commit(runtime).map_err(|e| {
XlogError::Kernel(format!(
"memset_column_recorded: launch recorder commit failed: {}",
e
))
})?;
Ok(())
}
}