opentslm 0.1.0 - Docs.rs

//! Single-layer MLP projector — mirrors `MLPProjector` from
//! `src/opentslm/model/projector/MLPProjector.py`.
//!
//! The projector is used by the Flamingo-style variant (`OpenTSLMFlamingo`)
//! to bridge the encoder output dimension (128) to the LLM hidden size.
//! The SP (soft-prompt) variant used in this Rust port projects directly to
//! vocabulary logits via a `LogitBiasHead` linear layer in
//! [`opentslm_sp`](super::llm::opentslm_sp), so this module is retained for
//! completeness but is not wired into the default training pipeline.
//!
//! # Forward pass
//!
//! ```text
//! x  [B, N, input_dim]
//!  → LayerNorm
//!  → Linear(input_dim → output_dim)
//!  → GELU
//!  → Dropout
//!  → [B, N, output_dim]
//! ```

use burn::{
    module::Module,
    nn::{Dropout, DropoutConfig, LayerNorm, LayerNormConfig, Linear, LinearConfig},
    prelude::Backend,
    tensor::{activation, Tensor},
};

/// Configuration for [`MlpProjector`].
#[derive(Debug, Clone)]
pub struct MlpProjectorConfig {
    /// Dimensionality of the input tensor (encoder output dim).
    pub input_dim: usize,
    /// Dimensionality of the output tensor (LLM hidden size).
    pub output_dim: usize,
    /// Dropout probability applied after the GELU activation.
    pub dropout: f64,
}

impl MlpProjectorConfig {
    /// Create a config with no dropout (the default used in all OpenTSLM
    /// experiments).
    pub fn new(input_dim: usize, output_dim: usize) -> Self {
        Self {
            input_dim,
            output_dim,
            dropout: 0.0,
        }
    }

    /// Instantiate an [`MlpProjector`] on `device`.
    pub fn init<B: Backend>(&self, device: &B::Device) -> MlpProjector<B> {
        MlpProjector {
            norm: LayerNormConfig::new(self.input_dim).init::<B>(device),
            linear: LinearConfig::new(self.input_dim, self.output_dim).init::<B>(device),
            dropout: DropoutConfig::new(self.dropout).init(),
        }
    }
}

/// Single-layer MLP projector: LayerNorm → Linear → GELU → Dropout.
///
/// Mirrors `MLPProjector` in
/// `src/opentslm/model/projector/MLPProjector.py`.
#[derive(Module, Debug)]
pub struct MlpProjector<B: Backend> {
    norm:    LayerNorm<B>,
    linear:  Linear<B>,
    dropout: Dropout,
}

impl<B: Backend> MlpProjector<B> {
    /// `x`: `[B, N, input_dim]` → `[B, N, output_dim]`
    pub fn forward(&self, x: Tensor<B, 3>) -> Tensor<B, 3> {
        let x = self.norm.forward(x);
        let x = self.linear.forward(x);
        let x = activation::gelu(x);
        self.dropout.forward(x)
    }
}