pub struct FlashAttnVecParams {
pub num_heads: u32,
pub num_kv_heads: u32,
pub head_dim: u32,
pub kv_seq_len: u32,
pub kv_capacity: u32,
pub scale: f32,
pub mask_type: u32,
pub sliding_window: u32,
pub softcap: f32,
}Expand description
Parameters for the flash attention vector kernel.
Fields§
§num_heads: u32Number of query attention heads.
num_kv_heads: u32Number of key/value attention heads (GQA: may be < num_heads).
head_dim: u32Dimension of each attention head (256 or 512).
kv_seq_len: u32Current KV sequence length (number of valid positions).
kv_capacity: u32KV cache capacity (stride between KV heads in positions).
scale: f32Attention score scaling factor (e.g. 1/sqrt(head_dim) or 1.0).
mask_type: u32Mask type: 0=none, 1=causal, 2=sliding_window.
sliding_window: u32Sliding window size (only used when mask_type == 2).
softcap: f32Logit softcapping (0 = disabled).
Trait Implementations§
Source§impl Clone for FlashAttnVecParams
impl Clone for FlashAttnVecParams
Source§fn clone(&self) -> FlashAttnVecParams
fn clone(&self) -> FlashAttnVecParams
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for FlashAttnVecParams
impl Debug for FlashAttnVecParams
impl Copy for FlashAttnVecParams
Auto Trait Implementations§
impl Freeze for FlashAttnVecParams
impl RefUnwindSafe for FlashAttnVecParams
impl Send for FlashAttnVecParams
impl Sync for FlashAttnVecParams
impl Unpin for FlashAttnVecParams
impl UnsafeUnpin for FlashAttnVecParams
impl UnwindSafe for FlashAttnVecParams
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more