Struct dfdx::nn::MultiHeadAttention

source · [−]

pub struct MultiHeadAttention<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> {
    pub w_q: Linear<M, K>,
    pub w_k: Linear<N, K>,
    pub w_v: Linear<N, V>,
    pub w_o: Linear<V, M>,
}

Expand description

Requires Nightly A multi-head attention layer.

Generics

M The embedding size of token vectors from decoder.
N The embedding size of token vectors from encoder.
K The size of the keys in self attention.
V The size of the values.
H The number of attention heads.

Examples

MultiHeadAttention<8, 10, 10, 10, 2> is an attention layer with 2 heads and 10 token, key and value dims. TODO: Doctests fail for some reason

Fields

w_q: Linear<M, K>w_k: Linear<N, K>w_v: Linear<N, V>w_o: Linear<V, M>

Trait Implementations

source

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> CanUpdateWithGradients for MultiHeadAttention<M, N, K, V, H>

source

fn update<G: GradientProvider>(
 &mut self,
 grads: &mut G,
 unused: &mut UnusedTensors
)

Updates self given the GradientProvider. When any parameters that are NOT present in G, then this function should add the tensor’s UniqueId to UnusedTensors. Read more

source

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> Clone for MultiHeadAttention<M, N, K, V, H>

source

fn clone(&self) -> MultiHeadAttention<M, N, K, V, H>

Returns a copy of the value. Read more

1.0.0 · source

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

source

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> Debug for MultiHeadAttention<M, N, K, V, H>

source

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

source

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> Default for MultiHeadAttention<M, N, K, V, H>

source

fn default() -> MultiHeadAttention<M, N, K, V, H>

Returns the “default value” for a type. Read more

source

impl<const M: usize, const N: usize, const K: usize, const V: usize, const S1: usize, const S2: usize, const H: usize, T: 'static + Tape> Module<(Tensor2D<S1, M, T>, Tensor2D<S2, N, NoneTape>)> for MultiHeadAttention<M, N, K, V, H> where
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,

source

fn forward(
&self,
(input, from_enc): (Tensor2D<S1, M, T>, Tensor2D<S2, N>)
) -> Self::Output

Encoder-Decoder style self attention where one set of tensors is used for values and keys, and another is used for queries

type Output = Tensor2D<S1, M, T>

The type that this unit produces given Input.

source

fn forward_mut(&mut self, input: Input) -> Self::Output

Pass an Input through the unit and produce Self::Output. Can be implemented for multiple Input types. Read more

source

impl<const B: usize, const M: usize, const N: usize, const K: usize, const V: usize, const S1: usize, const S2: usize, const H: usize, T: 'static + Tape> Module<(Tensor3D<B, S1, M, T>, Tensor3D<B, S2, N, NoneTape>)> for MultiHeadAttention<M, N, K, V, H> where
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,

source

fn forward(
&self,
(input, from_enc): (Tensor3D<B, S1, M, T>, Tensor3D<B, S2, N>)
) -> Self::Output

Batched Encoder-Decoder style self attention where one set of tensors is used for values and keys, and another is used for queries

type Output = Tensor3D<B, S1, M, T>

The type that this unit produces given Input.

source

fn forward_mut(&mut self, input: Input) -> Self::Output

Pass an Input through the unit and produce Self::Output. Can be implemented for multiple Input types. Read more

source

impl<const M: usize, const K: usize, const V: usize, const S: usize, const H: usize, T: 'static + Tape> Module<Tensor2D<S, M, T>> for MultiHeadAttention<M, M, K, V, H> where
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,

source

fn forward(&self, input: Tensor2D<S, M, T>) -> Self::Output

Normal self attention (where same tensors are used for keys, queries and values)

type Output = Tensor2D<S, M, T>

The type that this unit produces given Input.

source

fn forward_mut(&mut self, input: Input) -> Self::Output

Pass an Input through the unit and produce Self::Output. Can be implemented for multiple Input types. Read more

source

impl<const B: usize, const M: usize, const K: usize, const V: usize, const S: usize, const H: usize, T: 'static + Tape> Module<Tensor3D<B, S, M, T>> for MultiHeadAttention<M, M, K, V, H> where
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,
 Assert<{ _ }>: ConstTrue,

source

fn forward(&self, input: Tensor3D<B, S, M, T>) -> Self::Output

Batched normal self attention (where same tensors are used for keys, queries and values)

type Output = Tensor3D<B, S, M, T>

The type that this unit produces given Input.

source

fn forward_mut(&mut self, input: Input) -> Self::Output

Pass an Input through the unit and produce Self::Output. Can be implemented for multiple Input types. Read more

source

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> ResetParams for MultiHeadAttention<M, N, K, V, H>

source

fn reset_params<R: Rng>(&mut self, rng: &mut R)

Mutate the unit’s parameters using rand::Rng. Each implementor of this trait decides how the parameters are initialized. In fact, some impls may not even use the rng. Read more

Auto Trait Implementations

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> RefUnwindSafe for MultiHeadAttention<M, N, K, V, H>

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> !Send for MultiHeadAttention<M, N, K, V, H>

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> !Sync for MultiHeadAttention<M, N, K, V, H>

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> Unpin for MultiHeadAttention<M, N, K, V, H>

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> UnwindSafe for MultiHeadAttention<M, N, K, V, H>

Blanket Implementations

source

impl<T> Any for T where
T: 'static + ?Sized,

source

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

source

impl<T> Borrow<T> for T where
T: ?Sized,

const: unstable · source

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

source

impl<T> BorrowMut<T> for T where
T: ?Sized,

const: unstable · source

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

source

impl<T> From<T> for T

const: unstable · source

fn from(t: T) -> T

Returns the argument unchanged.

source

impl<T, U> Into for T where
U: From<T>,

const: unstable · source

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source

impl<T> Same<T> for T

type Output = T

Should always be Self

source

impl<T> ToOwned for T where
T: Clone,

type Owned = T

The resulting type after obtaining ownership.

source

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

source

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

source

impl<T, U> TryFrom for T where
U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

const: unstable · source

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

source

impl<T, U> TryInto for T where
U: TryFrom<T>,

type Error = >::Error

The type returned in the event of a conversion error.

const: unstable · source

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct dfdx::nn::MultiHeadAttention

Fields

Trait Implementations

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> CanUpdateWithGradients for MultiHeadAttention<M, N, K, V, H>

fn update<G: GradientProvider>( &mut self, grads: &mut G, unused: &mut UnusedTensors)

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> Clone for MultiHeadAttention<M, N, K, V, H>

fn clone(&self) -> MultiHeadAttention<M, N, K, V, H>

fn clone_from(&mut self, source: &Self)

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> Debug for MultiHeadAttention<M, N, K, V, H>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> Default for MultiHeadAttention<M, N, K, V, H>

fn default() -> MultiHeadAttention<M, N, K, V, H>

fn forward( &self, (input, from_enc): (Tensor2D<S1, M, T>, Tensor2D<S2, N>)) -> Self::Output

type Output = Tensor2D<S1, M, T>

fn forward_mut(&mut self, input: Input) -> Self::Output

fn forward( &self, (input, from_enc): (Tensor3D<B, S1, M, T>, Tensor3D<B, S2, N>)) -> Self::Output

type Output = Tensor3D<B, S1, M, T>

fn forward_mut(&mut self, input: Input) -> Self::Output

impl<const M: usize, const K: usize, const V: usize, const S: usize, const H: usize, T: 'static + Tape> Module<Tensor2D<S, M, T>> for MultiHeadAttention<M, M, K, V, H> where Assert<{ _ }>: ConstTrue, Assert<{ _ }>: ConstTrue, Assert<{ _ }>: ConstTrue,

fn forward(&self, input: Tensor2D<S, M, T>) -> Self::Output

type Output = Tensor2D<S, M, T>

fn forward_mut(&mut self, input: Input) -> Self::Output

impl<const B: usize, const M: usize, const K: usize, const V: usize, const S: usize, const H: usize, T: 'static + Tape> Module<Tensor3D<B, S, M, T>> for MultiHeadAttention<M, M, K, V, H> where Assert<{ _ }>: ConstTrue, Assert<{ _ }>: ConstTrue, Assert<{ _ }>: ConstTrue,

fn forward(&self, input: Tensor3D<B, S, M, T>) -> Self::Output

type Output = Tensor3D<B, S, M, T>

fn forward_mut(&mut self, input: Input) -> Self::Output

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> ResetParams for MultiHeadAttention<M, N, K, V, H>

fn reset_params<R: Rng>(&mut self, rng: &mut R)

Auto Trait Implementations

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> RefUnwindSafe for MultiHeadAttention<M, N, K, V, H>

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> !Send for MultiHeadAttention<M, N, K, V, H>

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> !Sync for MultiHeadAttention<M, N, K, V, H>

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> Unpin for MultiHeadAttention<M, N, K, V, H>

impl<const M: usize, const N: usize, const K: usize, const V: usize, const H: usize> UnwindSafe for MultiHeadAttention<M, N, K, V, H>

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T where T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for T where U: From<T>,

fn into(self) -> U

impl<T> Same<T> for T

type Output = T

impl<T> ToOwned for T where T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for T where U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for T where V: MultiLane<T>,

fn vzip(self) -> V

fn update<G: GradientProvider>(
&mut self,
grads: &mut G,
unused: &mut UnusedTensors
)

fn forward(
&self,
(input, from_enc): (Tensor2D<S1, M, T>, Tensor2D<S2, N>)
) -> Self::Output

fn forward(
&self,
(input, from_enc): (Tensor3D<B, S1, M, T>, Tensor3D<B, S2, N>)
) -> Self::Output

impl<const M: usize, const K: usize, const V: usize, const S: usize, const H: usize, T: 'static + Tape> Module<Tensor2D<S, M, T>> for MultiHeadAttention<M, M, K, V, H> where
Assert<{ _ }>: ConstTrue,
Assert<{ _ }>: ConstTrue,
Assert<{ _ }>: ConstTrue,

impl<const B: usize, const M: usize, const K: usize, const V: usize, const S: usize, const H: usize, T: 'static + Tape> Module<Tensor3D<B, S, M, T>> for MultiHeadAttention<M, M, K, V, H> where
Assert<{ _ }>: ConstTrue,
Assert<{ _ }>: ConstTrue,
Assert<{ _ }>: ConstTrue,

impl<T> Any for T where
T: 'static + ?Sized,

impl<T> Borrow<T> for T where
T: ?Sized,

impl<T> BorrowMut<T> for T where
T: ?Sized,

impl<T, U> Into<U> for T where
U: From<T>,

impl<T> ToOwned for T where
T: Clone,

impl<T, U> TryFrom<U> for T where
U: Into<T>,

impl<T, U> TryInto<U> for T where
U: TryFrom<T>,

impl<V, T> VZip<V> for T where
V: MultiLane<T>,