pub async fn batch_flash_attention_compute(
queries: Vec<Float32Array>,
keys: Vec<Vec<Float32Array>>,
values: Vec<Vec<Float32Array>>,
dim: u32,
block_size: u32,
) -> Result<BatchResult>Expand description
Process a batch with flash attention