Struct FuseAttentionBlock

Source

pub struct FuseAttentionBlock;

Expand description

Fuses matmul(QKV) → narrow(Q,K,V) → [rope] → attention → matmul(out) into a single FusedAttentionBlock when batch*seq is small.

The optimizer auto-detects batch size from graph input shapes. For small inputs (batch*seq ≤ 64), intermediate tensors fit in L1 cache, making a monolithic kernel faster than separate BLAS calls.

Threshold is configurable via RLX_FUSE_ATTN_THRESHOLD (default: 64).

Trait Implementations§

Source §

impl Pass for FuseAttentionBlock

Source §

fn name(&self) -> &str

Human-readable name for logging.

Source §

fn run(&self, graph: Graph) -> Graph

Transform the graph. Returns a new graph (or the same if no changes).

Auto Trait Implementations§

§

impl UnwindSafe for FuseAttentionBlock

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

FuseAttentionBlock

Struct FuseAttentionBlock

Trait Implementations§

impl Pass for FuseAttentionBlock

fn name(&self) -> &str

fn run(&self, graph: Graph) -> Graph

Auto Trait Implementations§

impl Freeze for FuseAttentionBlock

impl RefUnwindSafe for FuseAttentionBlock

impl Send for FuseAttentionBlock

impl Sync for FuseAttentionBlock

impl Unpin for FuseAttentionBlock

impl UnsafeUnpin for FuseAttentionBlock

impl UnwindSafe for FuseAttentionBlock

Blanket Implementations§

impl<T> Any for T
where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T
where T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T
where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for T
where U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for T
where U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

FuseAttentionBlock

Struct FuseAttentionBlock Copy item path

Trait Implementations§

impl Pass for FuseAttentionBlock

fn name(&self) -> &str

fn run(&self, graph: Graph) -> Graph

Auto Trait Implementations§

impl Freeze for FuseAttentionBlock

impl RefUnwindSafe for FuseAttentionBlock

impl Send for FuseAttentionBlock

impl Sync for FuseAttentionBlock

impl Unpin for FuseAttentionBlock

impl UnsafeUnpin for FuseAttentionBlock

impl UnwindSafe for FuseAttentionBlock

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct FuseAttentionBlock

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,