Struct MultiHeadAttention

Source

pub struct MultiHeadAttention { /* private fields */ }

Expand description

Multi-Head Attention (Vaswani et al., 2017).

Allows the model to jointly attend to information from different representation subspaces at different positions.

§Example

let mha = MultiHeadAttention::new(512, 8);  // d_model=512, num_heads=8
let q = Tensor::randn(&[32, 10, 512]);
let k = Tensor::randn(&[32, 20, 512]);
let v = Tensor::randn(&[32, 20, 512]);
let (output, attn_weights) = mha.forward_qkv(&q, &k, &v, None);

Implementations§

Source §

impl MultiHeadAttention

Source

pub fn new(embed_dim: usize, num_heads: usize) -> Self

Create a new Multi-Head Attention layer.

§Arguments

embed_dim - Total dimension of the model (must be divisible by num_heads)
num_heads - Number of attention heads

§Panics

Panics if embed_dim is not divisible by num_heads.

Source

pub fn with_dropout(self, dropout_p: f32) -> Self

Set dropout probability.

Source

pub fn forward_qkv( &self, query: &Tensor, key: &Tensor, value: &Tensor, attn_mask: Option<&Tensor>, ) -> (Tensor, Tensor)

Forward pass with separate query, key, value inputs.

§Arguments

query - Query tensor [batch, target_len, embed_dim]
key - Key tensor [batch, source_len, embed_dim]
value - Value tensor [batch, source_len, embed_dim]
attn_mask - Optional attention mask [batch, target_len, source_len]

§Returns

Tuple of (output, attention_weights)

Source

pub fn forward_self( &self, x: &Tensor, attn_mask: Option<&Tensor>, ) -> (Tensor, Tensor)

Self-attention: query, key, value are the same.

Source

pub fn embed_dim(&self) -> usize

Get embed_dim.

Source

pub fn num_heads(&self) -> usize

Get num_heads.

Trait Implementations§

Source §

impl Debug for MultiHeadAttention

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Module for MultiHeadAttention

Source §

fn forward(&self, input: &Tensor) -> Tensor

Perform forward computation. Read more

Source §

fn parameters(&self) -> Vec<&Tensor>

Get references to all learnable parameters. Read more

Source §

fn parameters_mut(&mut self) -> Vec<&mut Tensor>

Get mutable references to all learnable parameters. Read more

Source §

fn train(&mut self)

Set the module to training mode. Read more

Source §

fn eval(&mut self)

Set the module to evaluation mode. Read more

Source §

fn training(&self) -> bool

Check if the module is in training mode.

Source §

fn refresh_caches(&mut self)

Refresh any cached computations after parameters have been modified. Read more

Source §

fn zero_grad(&mut self)

Zero out gradients for all parameters. Read more

Source §

fn num_parameters(&self) -> usize

Get the number of learnable parameters.

Auto Trait Implementations§

§

impl !UnwindSafe for MultiHeadAttention

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

impl<T> Pointable for T

Source §

const ALIGN: usize

The alignment of pointer.

Source §

type Init = T

The type for initializers.

Source §

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

Source §

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

Source §

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

Source §

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

Struct MultiHeadAttention Copy item path

§Example

Implementations§

impl MultiHeadAttention

pub fn new(embed_dim: usize, num_heads: usize) -> Self

§Arguments

§Panics

pub fn with_dropout(self, dropout_p: f32) -> Self

pub fn forward_qkv( &self, query: &Tensor, key: &Tensor, value: &Tensor, attn_mask: Option<&Tensor>, ) -> (Tensor, Tensor)

§Arguments

§Returns

pub fn forward_self( &self, x: &Tensor, attn_mask: Option<&Tensor>, ) -> (Tensor, Tensor)

pub fn embed_dim(&self) -> usize

pub fn num_heads(&self) -> usize

Trait Implementations§

impl Debug for MultiHeadAttention

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Module for MultiHeadAttention

fn forward(&self, input: &Tensor) -> Tensor

fn parameters(&self) -> Vec<&Tensor>

fn parameters_mut(&mut self) -> Vec<&mut Tensor>

fn train(&mut self)

fn eval(&mut self)

fn training(&self) -> bool

fn refresh_caches(&mut self)

fn zero_grad(&mut self)

fn num_parameters(&self) -> usize

Auto Trait Implementations§

impl Freeze for MultiHeadAttention

impl !RefUnwindSafe for MultiHeadAttention

impl Send for MultiHeadAttention

impl Sync for MultiHeadAttention

impl Unpin for MultiHeadAttention

impl UnsafeUnpin for MultiHeadAttention

impl !UnwindSafe for MultiHeadAttention

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Struct MultiHeadAttention

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,