pub fn memcpy_h2d<T: Copy>(dst: DevicePtr, src: &[T]) -> CudaRtResult<()>
Copy host slice → device (typed helper, no raw pointers).