Struct dfdx::nn::modules::TransformerDecoderBlock

source ·

pub struct TransformerDecoderBlock<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E: Dtype, D: Storage<E>> {
    pub self_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS, MODEL_DIM, MODEL_DIM, E, D>,
    pub norm1: LayerNorm1D<MODEL_DIM, E, D>,
    pub mh_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS, MODEL_DIM, MODEL_DIM, E, D>,
    pub norm2: LayerNorm1D<MODEL_DIM, E, D>,
    pub ff: Residual<(Linear<M, F, E, D>, ReLU, Linear<F, M, E, D>)>,
    pub norm3: LayerNorm1D<MODEL_DIM, E, D>,
}

Expand description

A transformer decoder block. Different than the normal transformer block as this self attention accepts an additional sequence from the encoder.

Generics

MODEL_DIM: The size of query/key/value tensors. Given to MultiHeadAttention.
NUM_HEADS: The number of heads in MultiHeadAttention.
FF_DIM: The size of the hidden layer in the feedforward network.

Pytorch equivalent:

decoder = torch.nn.TransformerDecoderLayer(
   EMBED_DIM, NUM_HEADS, dim_feedforward=FF_DIM, batch_first=True, dropout=0.0
)

Fields§

§self_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS, MODEL_DIM, MODEL_DIM, E, D>§norm1: LayerNorm1D<MODEL_DIM, E, D>§mh_attn: MultiHeadAttention<MODEL_DIM, NUM_HEADS, MODEL_DIM, MODEL_DIM, E, D>§norm2: LayerNorm1D<MODEL_DIM, E, D>§ff: Residual<(Linear<M, F, E, D>, ReLU, Linear<F, M, E, D>)>§norm3: LayerNorm1D<MODEL_DIM, E, D>

Struct dfdx::nn::modules::TransformerDecoderBlock

Fields§

Trait Implementations§

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E: Clone + Dtype, D: Clone + Storage<E>> Clone for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>

fn clone(&self) -> TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>

fn clone_from(&mut self, source: &Self)

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E: Debug + Dtype, D: Debug + Storage<E>> Debug for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

type Output = Tgt

type Error = <D as HasErr>::Err

fn try_forward(&self, (tgt, mem): (Tgt, Mem)) -> Result<Self::Output, D::Err>

fn forward(&self, input: Input) -> Self::Output

impl<const M: usize, const N: usize, const F: usize, E, D: Device<E>> TensorCollection<E, D> for TransformerDecoderBlock<M, N, F, E, D>where E: Dtype + Float + SampleUniform,

type To<E2: Dtype, D2: Device<E2>> = TransformerDecoderBlock<M, N, F, E2, D2>

fn iter_tensors<V: ModuleVisitor<Self, E, D>>( visitor: &mut V ) -> Result<Option<Self::To<V::E2, V::D2>>, V::Err>

fn module<F1, F2, Field>( name: &str, get_ref: F1, get_mut: F2 ) -> ModuleField<'_, F1, F2, Self, Field>where F1: FnMut(&Self) -> &Field, F2: FnMut(&mut Self) -> &mut Field, Field: TensorCollection<E, D>,

fn tensor<F1, F2, S>( name: &str, get_ref: F1, get_mut: F2, options: TensorOptions<S, E, D> ) -> TensorField<'_, F1, F2, Self, S, E, D>where F1: FnMut(&Self) -> &Tensor<S, E, D>, F2: FnMut(&mut Self) -> &mut Tensor<S, E, D>, S: Shape,

fn scalar<F1, F2, N>( name: &str, get_ref: F1, get_mut: F2, options: ScalarOptions<N> ) -> ScalarField<'_, F1, F2, Self, N>where F1: FnMut(&Self) -> &N, F2: FnMut(&mut Self) -> &mut N, N: NumCast,

impl<const M: usize, const H: usize, const F: usize, E: Dtype, D: Device<E>> NonMutableModule for TransformerDecoderBlock<M, H, F, E, D>

Auto Trait Implementations§

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E, D> RefUnwindSafe for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>where D: RefUnwindSafe, <D as Storage<E>>::Vec: RefUnwindSafe,

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E, D> Send for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>where D: Send,

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E, D> Sync for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>where D: Sync,

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E, D> Unpin for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>where D: Unpin,

impl<const MODEL_DIM: usize, const NUM_HEADS: usize, const FF_DIM: usize, E, D> UnwindSafe for TransformerDecoderBlock<MODEL_DIM, NUM_HEADS, FF_DIM, E, D>where D: UnwindSafe, <D as Storage<E>>::Vec: RefUnwindSafe,

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<D, E, M> BuildModule<D, E> for Mwhere D: Device<E>, E: Dtype, M: TensorCollection<E, D, To<E, D> = M>,

fn build(device: &D) -> Self

fn try_build(device: &D) -> Result<Self, D::Err>

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<E, D, T> LoadFromNpz<E, D> for Twhere E: Dtype + NumpyDtype, D: Device<E>, T: TensorCollection<E, D>,

fn load<P: AsRef<Path>>(&mut self, path: P) -> Result<(), NpzError>

fn read<R>(&mut self, r: &mut ZipArchive<R>) -> Result<(), NpzError>where R: Read + Seek,

impl<E, D, T> LoadFromSafetensors<E, D> for Twhere E: Dtype + SafeDtype, D: Device<E>, T: TensorCollection<E, D>,

fn load_safetensors<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error>

impl<E, D, M> ModelEMA<E, D> for Mwhere E: Dtype, D: Device<E>, M: TensorCollection<E, D>,

fn ema(&mut self, other: &Self, decay: impl Into<f64>)

fn try_ema(&mut self, other: &Self, decay: impl Into<f64>) -> Result<(), D::Err>

impl<M, T> ModuleMut<T> for Mwhere M: NonMutableModule + Module<T>,

type Output = <M as Module<T>>::Output

type Error = <M as Module<T>>::Error

fn try_forward_mut( &mut self, input: T ) -> Result<<M as ModuleMut<T>>::Output, <M as ModuleMut<T>>::Error>

fn forward_mut(&mut self, input: Input) -> Self::Output

impl<E, D, M> NumParams<E, D> for Mwhere E: Dtype, D: Device<E>, M: TensorCollection<E, D>,

fn num_trainable_params(&self) -> usize

impl<T> Pointable for T

const ALIGN: usize = mem::align_of::<T>()

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<E, D, M> ResetParams<E, D> for Mwhere E: Dtype, D: Device<E>, M: TensorCollection<E, D>,

fn reset_params(&mut self)

fn try_reset_params(&mut self) -> Result<(), D::Err>

impl<E, D, T> SaveToNpz<E, D> for Twhere E: Dtype + NumpyDtype, D: Device<E>, T: TensorCollection<E, D>,

fn save<P: AsRef<Path>>(&self, path: P) -> ZipResult<()>

fn write<W>(&self, w: &mut ZipWriter<W>) -> ZipResult<()>where W: Write + Seek,

impl<E, D, T> SaveToSafetensors<E, D> for Twhere E: Dtype + SafeDtype, D: Device<E>, T: TensorCollection<E, D>,

fn save_safetensors<P: AsRef<Path>>( &self, path: P ) -> Result<(), SafeTensorError>

impl<E, D1, D2, T> ToDevice<E, D1, D2> for Twhere E: Dtype, D1: Device<E>, D2: Device<E>, T: TensorCollection<E, D1>,

fn try_to_device(&self, device: &D2) -> Result<Self::To<E, D2>, D2::Err>

fn to_device(&self, device: &D2) -> Self::To<E, D2>

impl<E1, D, T> ToDtype<E1, D> for Twhere E1: Dtype, D: Device<E1>, T: TensorCollection<E1, D>,

fn try_to_dtype<E2: Dtype>(&self) -> Result<Self::To<E2, D>, D::Err>where D: Device<E2> + ToDtypeKernel<E1, E2>,

fn to_dtype<E2: Dtype>(&self) -> Self::To<E2, D>where D: Device<E2> + ToDtypeKernel<E1, E2>,

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,