Struct dfdx::nn::modules::MultiHeadAttention
source · pub struct MultiHeadAttention<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Dtype, D: DeviceStorage> {
pub w_q: Linear<EMBED_DIM, K_DIM, E, D>,
pub w_k: Linear<EMBED_DIM, K_DIM, E, D>,
pub w_v: Linear<EMBED_DIM, V_DIM, E, D>,
pub w_o: Linear<V_DIM, EMBED_DIM, E, D>,
}
Expand description
A multi-head attention layer.
Generics:
EMBED_DIM
: The size of query vectors.NUM_HEADS
The number of heads to split query/key/value into.- Optional
K_DIM
: The size of key vectors. Defaults toEMBED_DIM
- Optional
V_DIM
The size of value vectors. Defaults toEMBED_DIM
Pytorch equivalent: torch.nn.MultiheadAttention(EMBED_DIM, NUM_HEADS, batch_first=True)
Examples
MultiHeadAttention<8, 2>
is an attention layer with 2 heads and 8 token, key and value dims.MultiHeadAttention<8, 2, 6, 4>
is an attention layer with the key and value dimension different than the embed dimension
Fields§
§w_q: Linear<EMBED_DIM, K_DIM, E, D>
§w_k: Linear<EMBED_DIM, K_DIM, E, D>
§w_v: Linear<EMBED_DIM, V_DIM, E, D>
§w_o: Linear<V_DIM, EMBED_DIM, E, D>
Trait Implementations§
source§impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Clone + Dtype, D: Clone + DeviceStorage> Clone for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>
impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Clone + Dtype, D: Clone + DeviceStorage> Clone for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>
source§fn clone(&self) -> MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>
fn clone(&self) -> MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>
Returns a copy of the value. Read more
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moresource§impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Debug + Dtype, D: Debug + DeviceStorage> Debug for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>
impl<const EMBED_DIM: usize, const NUM_HEADS: usize, const K_DIM: usize, const V_DIM: usize, E: Debug + Dtype, D: Debug + DeviceStorage> Debug for MultiHeadAttention<EMBED_DIM, NUM_HEADS, K_DIM, V_DIM, E, D>
source§impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, B, S1, S2, T> Module<(Tensor<(B, S1, Const<M>), E, D, T>, Tensor<(B, S2, Const<M>), E, D, NoneTape>, Tensor<(B, S2, Const<M>), E, D, NoneTape>)> for MultiHeadAttention<M, H, K, V, E, D>where
E: Dtype + Float,
D: Device<E>,
B: Dim,
S1: Dim,
S2: Dim,
T: Tape<E, D>,
impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, B, S1, S2, T> Module<(Tensor<(B, S1, Const<M>), E, D, T>, Tensor<(B, S2, Const<M>), E, D, NoneTape>, Tensor<(B, S2, Const<M>), E, D, NoneTape>)> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype + Float, D: Device<E>, B: Dim, S1: Dim, S2: Dim, T: Tape<E, D>,
source§fn try_forward(
&self,
(q, k, v): (Tensor<(B, S1, Const<M>), E, D, T>, Tensor<(B, S2, Const<M>), E, D>, Tensor<(B, S2, Const<M>), E, D>)
) -> Result<Self::Output, D::Err>
fn try_forward( &self, (q, k, v): (Tensor<(B, S1, Const<M>), E, D, T>, Tensor<(B, S2, Const<M>), E, D>, Tensor<(B, S2, Const<M>), E, D>) ) -> Result<Self::Output, D::Err>
Batched Encoder-Decoder style self attention where one set of tensors is used for values and keys, and another is used for queries
type Error = <D as HasErr>::Err
source§impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, S1, S2, T> Module<(Tensor<(S1, Const<M>), E, D, T>, Tensor<(S2, Const<M>), E, D, NoneTape>, Tensor<(S2, Const<M>), E, D, NoneTape>)> for MultiHeadAttention<M, H, K, V, E, D>where
E: Dtype + Float,
D: Device<E>,
S1: Dim,
S2: Dim,
T: Tape<E, D>,
impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, S1, S2, T> Module<(Tensor<(S1, Const<M>), E, D, T>, Tensor<(S2, Const<M>), E, D, NoneTape>, Tensor<(S2, Const<M>), E, D, NoneTape>)> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype + Float, D: Device<E>, S1: Dim, S2: Dim, T: Tape<E, D>,
source§fn try_forward(
&self,
(q, k, v): (Tensor<(S1, Const<M>), E, D, T>, Tensor<(S2, Const<M>), E, D>, Tensor<(S2, Const<M>), E, D>)
) -> Result<Self::Output, D::Err>
fn try_forward( &self, (q, k, v): (Tensor<(S1, Const<M>), E, D, T>, Tensor<(S2, Const<M>), E, D>, Tensor<(S2, Const<M>), E, D>) ) -> Result<Self::Output, D::Err>
Encoder-Decoder style self attention where one set of tensors is used for values and keys, and another is used for queries
type Error = <D as HasErr>::Err
source§impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, Src> Module<Src> for MultiHeadAttention<M, H, K, V, E, D>where
E: Dtype,
D: Device<E>,
Src: SplitTape,
Self: Module<(Src, Src::NoTape, Src::NoTape), Output = Src, Error = D::Err>,
impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D, Src> Module<Src> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype, D: Device<E>, Src: SplitTape, Self: Module<(Src, Src::NoTape, Src::NoTape), Output = Src, Error = D::Err>,
source§impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D: Device<E>> TensorCollection<E, D> for MultiHeadAttention<M, H, K, V, E, D>where
E: Dtype + Float + SampleUniform,
impl<const M: usize, const H: usize, const K: usize, const V: usize, E, D: Device<E>> TensorCollection<E, D> for MultiHeadAttention<M, H, K, V, E, D>where E: Dtype + Float + SampleUniform,
§type To<E2: Dtype, D2: Device<E2>> = MultiHeadAttention<M, H, K, V, E2, D2>
type To<E2: Dtype, D2: Device<E2>> = MultiHeadAttention<M, H, K, V, E2, D2>
Type alias that specifies the how a module’s type changes when using a different dtype and/or
device.
source§fn iter_tensors<Vi: ModuleVisitor<Self, E, D>>(
visitor: &mut Vi
) -> Result<Option<Self::To<Vi::E2, Vi::D2>>, Vi::Err>
fn iter_tensors<Vi: ModuleVisitor<Self, E, D>>( visitor: &mut Vi ) -> Result<Option<Self::To<Vi::E2, Vi::D2>>, Vi::Err>
Specifies how to iterate through tensors or modules containted within this module, and how
to contruct this module given values for its fields. Returns
Err(_)
to indicate an error,
Ok(None)
to indicate that there is no error and a module has not been built, and
Ok(Some(_))
contains Self::Output<E2, D2>
source§fn module<F1, F2, Field>(
name: &str,
get_ref: F1,
get_mut: F2
) -> ModuleField<'_, F1, F2, Self, Field>where
F1: FnMut(&Self) -> &Field,
F2: FnMut(&mut Self) -> &mut Field,
Field: TensorCollection<E, D>,
fn module<F1, F2, Field>( name: &str, get_ref: F1, get_mut: F2 ) -> ModuleField<'_, F1, F2, Self, Field>where F1: FnMut(&Self) -> &Field, F2: FnMut(&mut Self) -> &mut Field, Field: TensorCollection<E, D>,
Creates a ModuleFields that represents a field that may contain one or more tensors. Read more
source§fn tensor<F1, F2, S>(
name: &str,
get_ref: F1,
get_mut: F2,
options: TensorOptions<S, E, D>
) -> TensorField<'_, F1, F2, Self, S, E, D>where
F1: FnMut(&Self) -> &Tensor<S, E, D>,
F2: FnMut(&mut Self) -> &mut Tensor<S, E, D>,
S: Shape,
fn tensor<F1, F2, S>( name: &str, get_ref: F1, get_mut: F2, options: TensorOptions<S, E, D> ) -> TensorField<'_, F1, F2, Self, S, E, D>where F1: FnMut(&Self) -> &Tensor<S, E, D>, F2: FnMut(&mut Self) -> &mut Tensor<S, E, D>, S: Shape,
Creates a ModuleFields that represents a tensor field. Read more