Struct MultiHeadAttention

Source

pub struct MultiHeadAttention<F: Float + Debug> { /* private fields */ }

Expand description

Multi-head attention layer as used in transformer architectures

This layer performs the attention operation described in “Attention Is All You Need” by Vaswani et al. It projects the queries, keys, and values into multiple heads, computes scaled dot-product attention for each head, concatenates the results, and projects the result back to the original dimension.

§Examples

use scirs2_neural::layers::{MultiHeadAttention, Layer};
use scirs2_neural::layers::AttentionConfig;
use ndarray::Array3;
use rand::rngs::SmallRng;
use rand::SeedableRng;

// Create multi-head attention with 2 heads and 64-dim embeddings
let mut rng = SmallRng::seed_from_u64(42);
let config = AttentionConfig {
    num_heads: 2,
    head_dim: 32,
    dropout_prob: 0.0,
    causal: false,
    scale: None,
};

let mha = MultiHeadAttention::new(64, config, &mut rng).unwrap();

// Forward pass with a batch of 2 samples, sequence length 3
let batch_size = 2;
let seq_len = 3;
let d_model = 64;
let input = Array3::<f64>::from_elem((batch_size, seq_len, d_model), 0.1).into_dyn();
let output = mha.forward(&input).unwrap();

// Output shape should match input shape
assert_eq!(output.shape(), input.shape());

Struct MultiHeadAttentionCopy item path

§Examples

Implementations§

impl<F: Float + Debug + ScalarOperand + 'static> MultiHeadAttention<F>

pub fn new<R: Rng>( d_model: usize, config: AttentionConfig, rng: &mut R, ) -> Result<Self>

§Arguments

§Returns

Trait Implementations§

impl<F: Float + Debug + ScalarOperand + 'static> Clone for MultiHeadAttention<F>

fn clone(&self) -> Self

const fn clone_from(&mut self, source: &Self)

impl<F: Debug + Float + Debug> Debug for MultiHeadAttention<F>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<F: Float + Debug + ScalarOperand + 'static> Layer<F> for MultiHeadAttention<F>

fn as_any(&self) -> &dyn Any

fn as_any_mut(&mut self) -> &mut dyn Any

fn forward(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>>

fn backward( &self, input: &Array<F, IxDyn>, _grad_output: &Array<F, IxDyn>, ) -> Result<Array<F, IxDyn>>

fn update(&mut self, learning_rate: F) -> Result<()>

fn params(&self) -> Vec<Array<F, IxDyn>> ⓘ

fn gradients(&self) -> Vec<Array<F, IxDyn>> ⓘ

fn set_gradients(&mut self, _gradients: &[Array<F, IxDyn>]) -> Result<()>

fn set_params(&mut self, _params: &[Array<F, IxDyn>]) -> Result<()>

fn set_training(&mut self, _training: bool)

fn is_training(&self) -> bool

fn layer_type(&self) -> &str

fn parameter_count(&self) -> usize

fn layer_description(&self) -> String

impl<F: Float + Debug + ScalarOperand + 'static> ParamLayer<F> for MultiHeadAttention<F>

fn get_parameters(&self) -> Vec<&Array<F, IxDyn>> ⓘ

fn get_gradients(&self) -> Vec<&Array<F, IxDyn>> ⓘ

fn set_parameters(&mut self, params: Vec<Array<F, IxDyn>>) -> Result<()>

Auto Trait Implementations§

impl<F> !Freeze for MultiHeadAttention<F>

impl<F> !RefUnwindSafe for MultiHeadAttention<F>

impl<F> Send for MultiHeadAttention<F>where F: Send,

impl<F> !Sync for MultiHeadAttention<F>

impl<F> Unpin for MultiHeadAttention<F>where F: Unpin,

impl<F> UnwindSafe for MultiHeadAttention<F>where F: UnwindSafe + RefUnwindSafe,

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Struct MultiHeadAttention

impl<F> Send for MultiHeadAttention<F>
where F: Send,

impl<F> Unpin for MultiHeadAttention<F>
where F: Unpin,

impl<F> UnwindSafe for MultiHeadAttention<F>
where F: UnwindSafe + RefUnwindSafe,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,