Struct dfdx::nn::modules::TransformerDecoderBlock
source · pub struct TransformerDecoderBlock<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E: Dtype, D: DeviceStorage> {
pub self_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS, MODEL_DIM, MODEL_DIM, E, D>,
pub norm1: LayerNorm1D<MODEL_DIM, E, D>,
pub mh_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS, MODEL_DIM, MODEL_DIM, E, D>,
pub norm2: LayerNorm1D<MODEL_DIM, E, D>,
pub ff: Residual<(Linear<M, F, E, D>, ReLU, Linear<F, M, E, D>)>,
pub norm3: LayerNorm1D<MODEL_DIM, E, D>,
}
Expand description
A transformer decoder block. Different than the normal transformer block as this self attention accepts an additional sequence from the encoder.
Generics
MODEL_DIM
: The size of query/key/value tensors. Given to MultiHeadAttention.NUM_HEADS
: The number of heads in MultiHeadAttention.FF_DIM
: The size of the hidden layer in the feedforward network.
Pytorch equivalent:
decoder = torch.nn.TransformerDecoderLayer(
EMBED_DIM, NUM_HEADS, dim_feedforward=FF_DIM, batch_first=True, dropout=0.0
)
Fields§
§self_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS, MODEL_DIM, MODEL_DIM, E, D>
§norm1: LayerNorm1D<MODEL_DIM, E, D>
§mh_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS, MODEL_DIM, MODEL_DIM, E, D>
§norm2: LayerNorm1D<MODEL_DIM, E, D>
§ff: Residual<(Linear<M, F, E, D>, ReLU, Linear<F, M, E, D>)>
§norm3: LayerNorm1D<MODEL_DIM, E, D>
Trait Implementations§
source§impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E: Clone + Dtype, D: Clone + DeviceStorage> Clone for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>
impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E: Clone + Dtype, D: Clone + DeviceStorage> Clone for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>
source§fn clone(&self) -> TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>
fn clone(&self) -> TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>
Returns a copy of the value. Read more
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moresource§impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E: Debug + Dtype, D: Debug + DeviceStorage> Debug for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>
impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E: Debug + Dtype, D: Debug + DeviceStorage> Debug for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>
source§impl<const M: usize, const H: usize, const F: usize, E: Dtype, D: Device<E>, Tgt, Mem> Module<(Tgt, Mem)> for TransformerDecoderBlock<M, H, F, E, D>where
Tgt: SplitTape + TryAdd<Tgt::NoTape> + HasErr<Err = D::Err>,
Mem: Clone,
MultiHeadAttention<M, H, M, M, E, D>: Module<Tgt, Output = Tgt, Error = D::Err> + Module<(Tgt, Mem, Mem), Output = Tgt, Error = D::Err>,
LayerNorm1D<M, E, D>: Module<Tgt, Output = Tgt, Error = D::Err>,
Residual<(Linear<M, F, E, D>, ReLU, Linear<F, M, E, D>)>: Module<Tgt, Output = Tgt, Error = D::Err>,
impl<const M: usize, const H: usize, const F: usize, E: Dtype, D: Device<E>, Tgt, Mem> Module<(Tgt, Mem)> for TransformerDecoderBlock<M, H, F, E, D>where Tgt: SplitTape + TryAdd<Tgt::NoTape> + HasErr<Err = D::Err>, Mem: Clone, MultiHeadAttention<M, H, M, M, E, D>: Module<Tgt, Output = Tgt, Error = D::Err> + Module<(Tgt, Mem, Mem), Output = Tgt, Error = D::Err>, LayerNorm1D<M, E, D>: Module<Tgt, Output = Tgt, Error = D::Err>, Residual<(Linear<M, F, E, D>, ReLU, Linear<F, M, E, D>)>: Module<Tgt, Output = Tgt, Error = D::Err>,
source§impl<const M: usize, const N: usize, const F: usize, E, D: Device<E>> TensorCollection<E, D> for TransformerDecoderBlock<M, N, F, E, D>where
E: Dtype + Float + SampleUniform,
impl<const M: usize, const N: usize, const F: usize, E, D: Device<E>> TensorCollection<E, D> for TransformerDecoderBlock<M, N, F, E, D>where E: Dtype + Float + SampleUniform,
§type To<E2: Dtype, D2: Device<E2>> = TransformerDecoderBlock<M, N, F, E2, D2>
type To<E2: Dtype, D2: Device<E2>> = TransformerDecoderBlock<M, N, F, E2, D2>
Type alias that specifies the how a module’s type changes when using a different dtype and/or
device.
source§fn iter_tensors<V: ModuleVisitor<Self, E, D>>(
visitor: &mut V
) -> Result<Option<Self::To<V::E2, V::D2>>, V::Err>
fn iter_tensors<V: ModuleVisitor<Self, E, D>>( visitor: &mut V ) -> Result<Option<Self::To<V::E2, V::D2>>, V::Err>
Specifies how to iterate through tensors or modules containted within this module, and how
to contruct this module given values for its fields. Returns
Err(_)
to indicate an error,
Ok(None)
to indicate that there is no error and a module has not been built, and
Ok(Some(_))
contains Self::Output<E2, D2>
source§fn module<F1, F2, Field>(
name: &str,
get_ref: F1,
get_mut: F2
) -> ModuleField<'_, F1, F2, Self, Field>where
F1: FnMut(&Self) -> &Field,
F2: FnMut(&mut Self) -> &mut Field,
Field: TensorCollection<E, D>,
fn module<F1, F2, Field>( name: &str, get_ref: F1, get_mut: F2 ) -> ModuleField<'_, F1, F2, Self, Field>where F1: FnMut(&Self) -> &Field, F2: FnMut(&mut Self) -> &mut Field, Field: TensorCollection<E, D>,
Creates a ModuleFields that represents a field that may contain one or more tensors. Read more
source§fn tensor<F1, F2, S>(
name: &str,
get_ref: F1,
get_mut: F2,
options: TensorOptions<S, E, D>
) -> TensorField<'_, F1, F2, Self, S, E, D>where
F1: FnMut(&Self) -> &Tensor<S, E, D>,
F2: FnMut(&mut Self) -> &mut Tensor<S, E, D>,
S: Shape,
fn tensor<F1, F2, S>( name: &str, get_ref: F1, get_mut: F2, options: TensorOptions<S, E, D> ) -> TensorField<'_, F1, F2, Self, S, E, D>where F1: FnMut(&Self) -> &Tensor<S, E, D>, F2: FnMut(&mut Self) -> &mut Tensor<S, E, D>, S: Shape,
Creates a ModuleFields that represents a tensor field. Read more