pub fn create_causal_mask(seq_len: usize, device: &Device) -> Result<Tensor>
Create a causal mask for attention