Struct dfdx::nn::TransformerDecoderBlock

source · [−]

pub struct TransformerDecoderBlock<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> {
    pub self_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS>,
    pub norm1: LayerNorm1D<MODEL_DIM>,
    pub mh_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS>,
    pub norm2: LayerNorm1D<MODEL_DIM>,
    pub ff: Residual<(Linear<M, F>, ReLU, Linear<F, M>)>,
    pub norm3: LayerNorm1D<MODEL_DIM>,
}

Expand description

Requires Nightly A transformer decoder block. Different than the normal transformer block as this self attention accepts an additional sequence from the encoder.

Generics

MODEL_DIM: The size of query/key/value tensors. Given to MultiHeadAttention.
NUM_HEADS: The number of heads in MultiHeadAttention.
FF_DIM: The size of the hidden layer in the feedforward network.

Pytorch equivalent:

decoder = torch.nn.TransformerDecoderLayer(
   EMBED_DIM, NUM_HEADS, dim_feedforward=FF_DIM, batch_first=True, dropout=0.0
)

TODO: Doctests

Fields

self_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS>norm1: LayerNorm1D<MODEL_DIM>mh_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS>norm2: LayerNorm1D<MODEL_DIM>ff: Residual<(Linear<M, F>, ReLU, Linear<F, M>)>norm3: LayerNorm1D<MODEL_DIM>

Struct dfdx::nn::TransformerDecoderBlock

Fields

Trait Implementations

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> CanUpdateWithGradients for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

fn update<G: GradientProvider>( &mut self, grads: &mut G, unused: &mut UnusedTensors)

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> Clone for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

fn clone(&self) -> TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

fn clone_from(&mut self, source: &Self)

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> Debug for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> Default for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

fn default() -> TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

impl<const M: usize, const H: usize, const F: usize> LoadFromNpz for TransformerDecoderBlock<M, H, F>

fn read<R: Read + Seek>( &mut self, pre: &str, r: &mut ZipArchive<R>) -> Result<(), NpzError>

fn load<P: AsRef<Path>>(&mut self, path: P) -> Result<(), NpzError>

type Output = Tgt

fn forward(&self, (tgt, mem): (Tgt, Mem)) -> Self::Output

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> ResetParams for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

fn reset_params<R: Rng>(&mut self, rng: &mut R)

impl<const M: usize, const H: usize, const F: usize> SaveToNpz for TransformerDecoderBlock<M, H, F>

fn write<W: Write + Seek>(&self, p: &str, w: &mut ZipWriter<W>) -> ZipResult<()>

fn save<P: AsRef<Path>>(&self, path: P) -> ZipResult<()>

Auto Trait Implementations

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> RefUnwindSafe for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> Send for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> Sync for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> Unpin for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize> UnwindSafe for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM>

Blanket Implementations

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

fn update<G: GradientProvider>(
&mut self,
grads: &mut G,
unused: &mut UnusedTensors
)

fn read<R: Read + Seek>(
&mut self,
pre: &str,
r: &mut ZipArchive<R>
) -> Result<(), NpzError>

impl<T> Any for Twhere
T: 'static + ?Sized,

impl<T> Borrow<T> for Twhere
T: ?Sized,

impl<T> BorrowMut<T> for Twhere
T: ?Sized,

impl<T, U> Into<U> for Twhere
U: From<T>,

impl<T> ToOwned for Twhere
T: Clone,

impl<T, U> TryFrom<U> for Twhere
U: Into<T>,

impl<T, U> TryInto<U> for Twhere
U: TryFrom<T>,

impl<V, T> VZip<V> for Twhere
V: MultiLane<T>,