pub fn memcpy_d2h<T: Copy>(dst: &mut [T], src: DevicePtr) -> CudaRtResult<()>
Copy device → host slice (typed helper, no raw pointers).