Struct dfdx::nn::modules::MultiHeadAttention

source ·

pub struct MultiHeadAttention<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Dtype, D: DeviceStorage> {
    pub w_q: Linear<EMBED_DIM, K_DIM, E, D>,
    pub w_k: Linear<EMBED_DIM, K_DIM, E, D>,
    pub w_v: Linear<EMBED_DIM, V_DIM, E, D>,
    pub w_o: Linear<V_DIM, EMBED_DIM, E, D>,
}

Expand description

A multi-head attention layer.

Generics:

EMBED_DIM: The size of query vectors.
NUM_HEADS The number of heads to split query/key/value into.
Optional K_DIM: The size of key vectors. Defaults to EMBED_DIM
Optional V_DIM The size of value vectors. Defaults to EMBED_DIM

Pytorch equivalent: torch.nn.MultiheadAttention(EMBED_DIM, NUM_HEADS, batch_first=True)

Examples

MultiHeadAttention<8, 2> is an attention layer with 2 heads and 8 token, key and value dims.
MultiHeadAttention<8, 2, 6, 4> is an attention layer with the key and value dimension different than the embed dimension

Fields§

§w_q: Linear<EMBED_DIM, K_DIM, E, D>§w_k: Linear<EMBED_DIM, K_DIM, E, D>§w_v: Linear<EMBED_DIM, V_DIM, E, D>§w_o: Linear<V_DIM, EMBED_DIM, E, D>

Trait Implementations§

source §

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Clone + Dtype, D: Clone + DeviceStorage> Clone for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>

source §

fn clone(&self) -> MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>

Returns a copy of the value. Read more

1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

source §

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Debug + Dtype, D: Debug + DeviceStorage> Debug for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>

source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

source §

impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, B, S1, S2, T> Module<(Tensor<(B, S1, Const<M>), E, D, T>, Tensor<(B, S2, Const<M>), E, D, NoneTape>, Tensor<(B, S2, Const<M>), E, D, NoneTape>)> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype + Float, D: Device<E>, B: Dim, S1: Dim, S2: Dim, T: Tape<E, D>,

source §

fn try_forward( &self, (q, k, v): (Tensor<(B, S1, Const<M>), E, D, T>, Tensor<(B, S2, Const<M>), E, D>, Tensor<(B, S2, Const<M>), E, D>) ) -> Result<Self::Output, D::Err>

Batched Encoder-Decoder style self attention where one set of tensors is used for values and keys, and another is used for queries

§

type Output = Tensor<(B, S1, Const<M>), E, D, T>

The type that this unit produces given Input.

§

type Error = <D as HasErr>::Err

source §

fn forward(&self, input: Input) -> Self::Output

Forward Input through the module and produce Module::Output. Read more

source §

impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, S1, S2, T> Module<(Tensor<(S1, Const<M>), E, D, T>, Tensor<(S2, Const<M>), E, D, NoneTape>, Tensor<(S2, Const<M>), E, D, NoneTape>)> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype + Float, D: Device<E>, S1: Dim, S2: Dim, T: Tape<E, D>,

source §

fn try_forward( &self, (q, k, v): (Tensor<(S1, Const<M>), E, D, T>, Tensor<(S2, Const<M>), E, D>, Tensor<(S2, Const<M>), E, D>) ) -> Result<Self::Output, D::Err>

Encoder-Decoder style self attention where one set of tensors is used for values and keys, and another is used for queries

§

type Output = Tensor<(S1, Const<M>), E, D, T>

The type that this unit produces given Input.

§

type Error = <D as HasErr>::Err

source §

fn forward(&self, input: Input) -> Self::Output

Forward Input through the module and produce Module::Output. Read more

source §

impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, Src> Module<Src> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype, D: Device<E>, Src: SplitTape, Self: Module<(Src, Src::NoTape, Src::NoTape), Output = Src, Error = D::Err>,

§

type Output = Src

The type that this unit produces given Input.

§

type Error = <D as HasErr>::Err

source §

fn try_forward(&self, src: Src) -> Result<Self::Output, D::Err>

source §

fn forward(&self, input: Input) -> Self::Output

Forward Input through the module and produce Module::Output. Read more

source §

impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D: Device<E>> TensorCollection<E, D> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype + Float + SampleUniform,

§

type To<E2: Dtype, D2: Device<E2>> = MultiHeadAttention<M, H, K, V, E2, D2>

Type alias that specifies the how a module’s type changes when using a different dtype and/or device.

source §

fn iter_tensors<Vi: ModuleVisitor<Self, E, D>>( visitor: &mut Vi ) -> Result<Option<Self::To<Vi::E2, Vi::D2>>, Vi::Err>

Specifies how to iterate through tensors or modules containted within this module, and how to contruct this module given values for its fields. Returns Err(_) to indicate an error, Ok(None) to indicate that there is no error and a module has not been built, and Ok(Some(_)) contains Self::Output<E2, D2>

source §

fn module<F1, F2, Field>( name: &str, get_ref: F1, get_mut: F2 ) -> ModuleField<'_, F1, F2, Self, Field>where F1: FnMut(&Self) -> &Field, F2: FnMut(&mut Self) -> &mut Field, Field: TensorCollection<E, D>,

Creates a ModuleFields that represents a field that may contain one or more tensors. Read more

source §

fn tensor<F1, F2, S>( name: &str, get_ref: F1, get_mut: F2, options: TensorOptions<S, E, D> ) -> TensorField<'_, F1, F2, Self, S, E, D>where F1: FnMut(&Self) -> &Tensor<S, E, D>, F2: FnMut(&mut Self) -> &mut Tensor<S, E, D>, S: Shape,

Creates a ModuleFields that represents a tensor field. Read more

source §

impl<const M: usize, const H: usize, const K: usize, const V: usize, E: Dtype, D: Device<E>> NonMutableModule for MultiHeadAttention<M, H, K, V, E, D>

Auto Trait Implementations§

§

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> RefUnwindSafe for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: RefUnwindSafe, <D as DeviceStorage>::Vec<E>: RefUnwindSafe,

§

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> Send for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: Send,

§

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> Sync for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: Sync,

§

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> Unpin for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: Unpin,

§

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> UnwindSafe for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: UnwindSafe, <D as DeviceStorage>::Vec<E>: RefUnwindSafe,

Blanket Implementations§

source §

impl<T> Any for Twhere T: 'static + ?Sized,

source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

source §

impl<T> Borrow<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

source §

impl<T> BorrowMut<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

source §

impl<T> From<T> for T

const: unstable · source§

fn from(t: T) -> T

Returns the argument unchanged.

source §

impl<T, U> Into for Twhere U: From<T>,

const: unstable · source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source §

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.

source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

source §

impl<T, U> TryFrom for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.

const: unstable · source§

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

source §

impl<T, U> TryInto for Twhere U: TryFrom<T>,

§

type Error = >::Error

The type returned in the event of a conversion error.

const: unstable · source§

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

§

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

§

Struct dfdx::nn::modules::MultiHeadAttention

Fields§

Trait Implementations§

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Clone + Dtype, D: Clone + DeviceStorage> Clone for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>

fn clone(&self) -> MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>

fn clone_from(&mut self, source: &Self)

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Debug + Dtype, D: Debug + DeviceStorage> Debug for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

fn try_forward( &self, (q, k, v): (Tensor<(B, S1, Const<M>), E, D, T>, Tensor<(B, S2, Const<M>), E, D>, Tensor<(B, S2, Const<M>), E, D>) ) -> Result<Self::Output, D::Err>

type Output = Tensor<(B, S1, Const<M>), E, D, T>

type Error = <D as HasErr>::Err

fn forward(&self, input: Input) -> Self::Output

fn try_forward( &self, (q, k, v): (Tensor<(S1, Const<M>), E, D, T>, Tensor<(S2, Const<M>), E, D>, Tensor<(S2, Const<M>), E, D>) ) -> Result<Self::Output, D::Err>

type Output = Tensor<(S1, Const<M>), E, D, T>

type Error = <D as HasErr>::Err

fn forward(&self, input: Input) -> Self::Output

impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, Src> Module<Src> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype, D: Device<E>, Src: SplitTape, Self: Module<(Src, Src::NoTape, Src::NoTape), Output = Src, Error = D::Err>,

type Output = Src

type Error = <D as HasErr>::Err

fn try_forward(&self, src: Src) -> Result<Self::Output, D::Err>

fn forward(&self, input: Input) -> Self::Output

impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D: Device<E>> TensorCollection<E, D> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype + Float + SampleUniform,

type To<E2: Dtype, D2: Device<E2>> = MultiHeadAttention<M, H, K, V, E2, D2>

fn iter_tensors<Vi: ModuleVisitor<Self, E, D>>( visitor: &mut Vi ) -> Result<Option<Self::To<Vi::E2, Vi::D2>>, Vi::Err>

fn module<F1, F2, Field>( name: &str, get_ref: F1, get_mut: F2 ) -> ModuleField<'_, F1, F2, Self, Field>where F1: FnMut(&Self) -> &Field, F2: FnMut(&mut Self) -> &mut Field, Field: TensorCollection<E, D>,

fn tensor<F1, F2, S>( name: &str, get_ref: F1, get_mut: F2, options: TensorOptions<S, E, D> ) -> TensorField<'_, F1, F2, Self, S, E, D>where F1: FnMut(&Self) -> &Tensor<S, E, D>, F2: FnMut(&mut Self) -> &mut Tensor<S, E, D>, S: Shape,

impl<const M: usize, const H: usize, const K: usize, const V: usize, E: Dtype, D: Device<E>> NonMutableModule for MultiHeadAttention<M, H, K, V, E, D>

Auto Trait Implementations§

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> RefUnwindSafe for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: RefUnwindSafe, <D as DeviceStorage>::Vec<E>: RefUnwindSafe,

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> Send for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: Send,

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> Sync for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: Sync,

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> Unpin for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: Unpin,

impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E, D> UnwindSafe for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>where D: UnwindSafe, <D as DeviceStorage>::Vec<E>: RefUnwindSafe,

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V