pub fn gpu_slice_read(
src: &CudaBuffer<f32>,
n_batch: usize,
d: usize,
len: usize,
max_len: usize,
device: &GpuDevice,
) -> GpuResult<CudaBuffer<f32>>Expand description
Read first len rows from each batch of [N, max_len, D] → [N, len, D].