pub fn dispatch_copy_f32(
encoder: &mut CommandEncoder,
registry: &mut KernelRegistry,
device: &DeviceRef,
src: &MlxBuffer,
dst: &MlxBuffer,
src_offset: usize,
dst_offset: usize,
count: usize,
) -> Result<()>Expand description
Copy count f32 elements from src[src_offset..] to dst[dst_offset..].
Used during prefill to scatter/gather rows between large prefill buffers and single-token activation buffers.