Struct FlashAttention

Source

pub struct FlashAttention { /* private fields */ }

Expand description

Block-wise attention computation optimized for CPU cache locality.

Instead of materializing the full N×N attention matrix, processes the computation in blocks that fit in L1/L2 cache, achieving O(N) memory complexity instead of O(N²).

Implementations§

Source §

impl FlashAttention

Source

pub fn new(config: FlashAttentionConfig) -> Self

Create a new FlashAttention with the given configuration.

Source

pub fn with_dimensions(dimensions: usize) -> Self

Create with default configuration.

Source

pub fn config(&self) -> &FlashAttentionConfig

Returns a reference to the configuration.

Source

pub fn attention( &self, queries: &[Vec<f32>], keys: &[Vec<f32>], values: &[Vec<f32>], ) -> Vec<Vec<f32>>

Compute scaled dot-product attention using the block-wise algorithm.

For sequences of length N with dimension D:

Naive: O(N²) memory (full attention matrix)
Flash: O(N) memory (block-wise accumulation via online softmax)

§Arguments

queries - Query vectors [N_q × D]
keys - Key vectors [N_k × D]
values - Value vectors [N_k × D]

§Returns

Output vectors [N_q × D]

Source

pub fn naive_attention( &self, queries: &[Vec<f32>], keys: &[Vec<f32>], values: &[Vec<f32>], ) -> Vec<Vec<f32>>

Naive attention implementation for benchmarking comparison.

Materializes the full N×N attention matrix: O(N²) memory.

Source

pub fn benchmark(&self, num_vectors: usize) -> BenchmarkResult

Run a benchmark comparing naive vs flash attention.

Generates random vectors and measures wall-clock time for both methods. Also verifies that both implementations produce equivalent results.

Source

pub fn self_attention(&self, sequence: &[Vec<f32>]) -> Vec<Vec<f32>>

Compute self-attention: a sequence attends to itself.

Convenience wrapper around attention(q, q, q).

Source

pub fn cross_attention( &self, queries: &[Vec<f32>], kv_sequence: &[Vec<f32>], ) -> Vec<Vec<f32>>

Compute cross-attention between two sequences.

Queries from one sequence attend to keys/values from another.

Source

pub fn memory_estimate(&self, seq_len: usize) -> MemoryEstimate

Estimate peak memory usage in bytes for a given sequence length.

Trait Implementations§

Source §

impl Debug for FlashAttention

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl UnwindSafe for FlashAttention

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

Source §

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Struct FlashAttention Copy item path

Implementations§

impl FlashAttention

pub fn new(config: FlashAttentionConfig) -> Self

pub fn with_dimensions(dimensions: usize) -> Self

pub fn config(&self) -> &FlashAttentionConfig

pub fn attention( &self, queries: &[Vec<f32>], keys: &[Vec<f32>], values: &[Vec<f32>], ) -> Vec<Vec<f32>>

§Arguments

§Returns

pub fn naive_attention( &self, queries: &[Vec<f32>], keys: &[Vec<f32>], values: &[Vec<f32>], ) -> Vec<Vec<f32>>

pub fn benchmark(&self, num_vectors: usize) -> BenchmarkResult

pub fn self_attention(&self, sequence: &[Vec<f32>]) -> Vec<Vec<f32>>

pub fn cross_attention( &self, queries: &[Vec<f32>], kv_sequence: &[Vec<f32>], ) -> Vec<Vec<f32>>

pub fn memory_estimate(&self, seq_len: usize) -> MemoryEstimate

Trait Implementations§

impl Debug for FlashAttention

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl Freeze for FlashAttention

impl RefUnwindSafe for FlashAttention

impl Send for FlashAttention

impl Sync for FlashAttention

impl Unpin for FlashAttention

impl UnsafeUnpin for FlashAttention

impl UnwindSafe for FlashAttention

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DTwhere ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DTwhere ST: ?Sized, DT: ?Sized,

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> Read<Exclusive, BecauseExclusive> for Twhere T: ?Sized,

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct FlashAttention

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,