use crate::albert::encoder::AlbertTransformer;
use crate::common::activations::Activation;
use crate::common::dropout::Dropout;
use crate::common::embeddings::get_shape_and_device_from_ids_embeddings_pair;
use crate::{albert::embeddings::AlbertEmbeddings, common::activations::TensorFunction};
use crate::{Config, RustBertError};
use serde::{Deserialize, Serialize};
use std::{borrow::Borrow, collections::HashMap};
use tch::nn::Module;
use tch::{nn, Kind, Tensor};
pub struct AlbertModelResources;
pub struct AlbertConfigResources;
pub struct AlbertVocabResources;
impl AlbertModelResources {
pub const ALBERT_BASE_V2: (&'static str, &'static str) = (
"albert-base-v2/model",
"https://huggingface.co/albert-base-v2/resolve/main/rust_model.ot",
);
pub const PARAPHRASE_ALBERT_SMALL_V2: (&'static str, &'static str) = (
"paraphrase-albert-small-v2/model",
"https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2/resolve/main/rust_model.ot",
);
}
impl AlbertConfigResources {
pub const ALBERT_BASE_V2: (&'static str, &'static str) = (
"albert-base-v2/config",
"https://huggingface.co/albert-base-v2/resolve/main/config.json",
);
pub const PARAPHRASE_ALBERT_SMALL_V2: (&'static str, &'static str) = (
"paraphrase-albert-small-v2/config",
"https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2/resolve/main/config.json",
);
}
impl AlbertVocabResources {
pub const ALBERT_BASE_V2: (&'static str, &'static str) = (
"albert-base-v2/spiece",
"https://huggingface.co/albert-base-v2/resolve/main/spiece.model",
);
pub const PARAPHRASE_ALBERT_SMALL_V2: (&'static str, &'static str) = (
"paraphrase-albert-small-v2/spiece",
"https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2/resolve/main/spiece.model",
);
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct AlbertConfig {
pub hidden_act: Activation,
pub attention_probs_dropout_prob: f64,
pub classifier_dropout_prob: Option<f64>,
pub bos_token_id: i64,
pub eos_token_id: i64,
pub embedding_size: i64,
pub hidden_dropout_prob: f64,
pub hidden_size: i64,
pub initializer_range: f32,
pub inner_group_num: i64,
pub intermediate_size: i64,
pub layer_norm_eps: Option<f64>,
pub max_position_embeddings: i64,
pub num_attention_heads: i64,
pub num_hidden_groups: i64,
pub num_hidden_layers: i64,
pub pad_token_id: i64,
pub type_vocab_size: i64,
pub vocab_size: i64,
pub output_attentions: Option<bool>,
pub output_hidden_states: Option<bool>,
pub is_decoder: Option<bool>,
pub id2label: Option<HashMap<i64, String>>,
pub label2id: Option<HashMap<String, i64>>,
}
impl Config for AlbertConfig {}
impl Default for AlbertConfig {
fn default() -> Self {
AlbertConfig {
hidden_act: Activation::gelu_new,
attention_probs_dropout_prob: 0.0,
classifier_dropout_prob: Some(0.1),
bos_token_id: 2,
eos_token_id: 3,
embedding_size: 128,
hidden_dropout_prob: 0.0,
hidden_size: 4096,
initializer_range: 0.02,
inner_group_num: 1,
intermediate_size: 16384,
layer_norm_eps: Some(1e-12),
max_position_embeddings: 512,
num_attention_heads: 64,
num_hidden_groups: 1,
num_hidden_layers: 12,
pad_token_id: 0,
type_vocab_size: 2,
vocab_size: 30000,
output_attentions: None,
output_hidden_states: None,
is_decoder: None,
id2label: None,
label2id: None,
}
}
}
pub struct AlbertModel {
embeddings: AlbertEmbeddings,
encoder: AlbertTransformer,
pooler: nn::Linear,
pooler_activation: TensorFunction,
}
impl AlbertModel {
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertModel
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let embeddings = AlbertEmbeddings::new(p / "embeddings", config);
let encoder = AlbertTransformer::new(p / "encoder", config);
let pooler = nn::linear(
p / "pooler",
config.hidden_size,
config.hidden_size,
Default::default(),
);
let pooler_activation = Activation::tanh.get_function();
AlbertModel {
embeddings,
encoder,
pooler,
pooler_activation,
}
}
pub fn forward_t(
&self,
input_ids: Option<&Tensor>,
mask: Option<&Tensor>,
token_type_ids: Option<&Tensor>,
position_ids: Option<&Tensor>,
input_embeds: Option<&Tensor>,
train: bool,
) -> Result<AlbertOutput, RustBertError> {
let (input_shape, device) =
get_shape_and_device_from_ids_embeddings_pair(input_ids, input_embeds)?;
let calc_mask = if mask.is_none() {
Some(Tensor::ones(input_shape, (Kind::Int64, device)))
} else {
None
};
let mask = mask.unwrap_or_else(|| calc_mask.as_ref().unwrap());
let embedding_output = self.embeddings.forward_t(
input_ids,
token_type_ids,
position_ids,
input_embeds,
train,
)?;
let extended_attention_mask = mask.unsqueeze(1).unsqueeze(2);
let extended_attention_mask: Tensor =
((extended_attention_mask.ones_like() - extended_attention_mask) * -10000.0)
.to_kind(embedding_output.kind());
let transformer_output =
self.encoder
.forward_t(&embedding_output, Some(extended_attention_mask), train);
let pooled_output = self
.pooler
.forward(&transformer_output.hidden_state.select(1, 0));
let pooled_output = (self.pooler_activation.get_fn())(&pooled_output);
Ok(AlbertOutput {
hidden_state: transformer_output.hidden_state,
pooled_output,
all_hidden_states: transformer_output.all_hidden_states,
all_attentions: transformer_output.all_attentions,
})
}
}
pub struct AlbertMLMHead {
layer_norm: nn::LayerNorm,
dense: nn::Linear,
decoder: nn::Linear,
activation: TensorFunction,
}
impl AlbertMLMHead {
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertMLMHead
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let layer_norm_eps = config.layer_norm_eps.unwrap_or(1e-12);
let layer_norm_config = nn::LayerNormConfig {
eps: layer_norm_eps,
..Default::default()
};
let layer_norm = nn::layer_norm(
p / "LayerNorm",
vec![config.embedding_size],
layer_norm_config,
);
let dense = nn::linear(
p / "dense",
config.hidden_size,
config.embedding_size,
Default::default(),
);
let decoder = nn::linear(
p / "decoder",
config.embedding_size,
config.vocab_size,
Default::default(),
);
let activation = config.hidden_act.get_function();
AlbertMLMHead {
layer_norm,
dense,
decoder,
activation,
}
}
pub fn forward(&self, hidden_states: &Tensor) -> Tensor {
let output: Tensor = (self.activation.get_fn())(&hidden_states.apply(&self.dense));
output.apply(&self.layer_norm).apply(&self.decoder)
}
}
pub struct AlbertForMaskedLM {
albert: AlbertModel,
predictions: AlbertMLMHead,
}
impl AlbertForMaskedLM {
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForMaskedLM
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let predictions = AlbertMLMHead::new(p / "predictions", config);
AlbertForMaskedLM {
albert,
predictions,
}
}
pub fn forward_t(
&self,
input_ids: Option<&Tensor>,
mask: Option<&Tensor>,
token_type_ids: Option<&Tensor>,
position_ids: Option<&Tensor>,
input_embeds: Option<&Tensor>,
train: bool,
) -> AlbertMaskedLMOutput {
let base_model_output = self
.albert
.forward_t(
input_ids,
mask,
token_type_ids,
position_ids,
input_embeds,
train,
)
.unwrap();
let prediction_scores = self.predictions.forward(&base_model_output.hidden_state);
AlbertMaskedLMOutput {
prediction_scores,
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
}
}
}
pub struct AlbertForSequenceClassification {
albert: AlbertModel,
dropout: Dropout,
classifier: nn::Linear,
}
impl AlbertForSequenceClassification {
pub fn new<'p, P>(
p: P,
config: &AlbertConfig,
) -> Result<AlbertForSequenceClassification, RustBertError>
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let classifier_dropout_prob = config.classifier_dropout_prob.unwrap_or(0.1);
let dropout = Dropout::new(classifier_dropout_prob);
let num_labels = config
.id2label
.as_ref()
.ok_or_else(|| {
RustBertError::InvalidConfigurationError(
"num_labels not provided in configuration".to_string(),
)
})?
.len() as i64;
let classifier = nn::linear(
p / "classifier",
config.hidden_size,
num_labels,
Default::default(),
);
Ok(AlbertForSequenceClassification {
albert,
dropout,
classifier,
})
}
pub fn forward_t(
&self,
input_ids: Option<&Tensor>,
mask: Option<&Tensor>,
token_type_ids: Option<&Tensor>,
position_ids: Option<&Tensor>,
input_embeds: Option<&Tensor>,
train: bool,
) -> AlbertSequenceClassificationOutput {
let base_model_output = self
.albert
.forward_t(
input_ids,
mask,
token_type_ids,
position_ids,
input_embeds,
train,
)
.unwrap();
let logits = base_model_output
.pooled_output
.apply_t(&self.dropout, train)
.apply(&self.classifier);
AlbertSequenceClassificationOutput {
logits,
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
}
}
}
pub struct AlbertForTokenClassification {
albert: AlbertModel,
dropout: Dropout,
classifier: nn::Linear,
}
impl AlbertForTokenClassification {
pub fn new<'p, P>(
p: P,
config: &AlbertConfig,
) -> Result<AlbertForTokenClassification, RustBertError>
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let num_labels = config
.id2label
.as_ref()
.ok_or_else(|| {
RustBertError::InvalidConfigurationError(
"num_labels not provided in configuration".to_string(),
)
})?
.len() as i64;
let classifier = nn::linear(
p / "classifier",
config.hidden_size,
num_labels,
Default::default(),
);
Ok(AlbertForTokenClassification {
albert,
dropout,
classifier,
})
}
pub fn forward_t(
&self,
input_ids: Option<&Tensor>,
mask: Option<&Tensor>,
token_type_ids: Option<&Tensor>,
position_ids: Option<&Tensor>,
input_embeds: Option<&Tensor>,
train: bool,
) -> AlbertTokenClassificationOutput {
let base_model_output = self
.albert
.forward_t(
input_ids,
mask,
token_type_ids,
position_ids,
input_embeds,
train,
)
.unwrap();
let logits = base_model_output
.hidden_state
.apply_t(&self.dropout, train)
.apply(&self.classifier);
AlbertTokenClassificationOutput {
logits,
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
}
}
}
pub struct AlbertForQuestionAnswering {
albert: AlbertModel,
qa_outputs: nn::Linear,
}
impl AlbertForQuestionAnswering {
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForQuestionAnswering
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let num_labels = 2;
let qa_outputs = nn::linear(
p / "qa_outputs",
config.hidden_size,
num_labels,
Default::default(),
);
AlbertForQuestionAnswering { albert, qa_outputs }
}
pub fn forward_t(
&self,
input_ids: Option<&Tensor>,
mask: Option<&Tensor>,
token_type_ids: Option<&Tensor>,
position_ids: Option<&Tensor>,
input_embeds: Option<&Tensor>,
train: bool,
) -> AlbertQuestionAnsweringOutput {
let base_model_output = self
.albert
.forward_t(
input_ids,
mask,
token_type_ids,
position_ids,
input_embeds,
train,
)
.unwrap();
let logits = base_model_output
.hidden_state
.apply(&self.qa_outputs)
.split(1, -1);
let (start_logits, end_logits) = (&logits[0], &logits[1]);
let start_logits = start_logits.squeeze_dim(-1);
let end_logits = end_logits.squeeze_dim(-1);
AlbertQuestionAnsweringOutput {
start_logits,
end_logits,
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
}
}
}
pub struct AlbertForMultipleChoice {
albert: AlbertModel,
dropout: Dropout,
classifier: nn::Linear,
}
impl AlbertForMultipleChoice {
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForMultipleChoice
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let num_labels = 1;
let classifier = nn::linear(
p / "classifier",
config.hidden_size,
num_labels,
Default::default(),
);
AlbertForMultipleChoice {
albert,
dropout,
classifier,
}
}
pub fn forward_t(
&self,
input_ids: Option<&Tensor>,
mask: Option<&Tensor>,
token_type_ids: Option<&Tensor>,
position_ids: Option<&Tensor>,
input_embeds: Option<&Tensor>,
train: bool,
) -> Result<AlbertSequenceClassificationOutput, RustBertError> {
let (input_ids, input_embeds, num_choices) = match &input_ids {
Some(input_value) => match &input_embeds {
Some(_) => {
return Err(RustBertError::ValueError(
"Only one of input ids or input embeddings may be set".into(),
));
}
None => (
Some(input_value.view((-1, *input_value.size().last().unwrap()))),
None,
input_value.size()[1],
),
},
None => match &input_embeds {
Some(embeds) => (
None,
Some(embeds.view((-1, embeds.size()[1], embeds.size()[2]))),
embeds.size()[1],
),
None => {
return Err(RustBertError::ValueError(
"At least one of input ids or input embeddings must be set".into(),
));
}
},
};
let mask = mask.map(|tensor| tensor.view((-1, *tensor.size().last().unwrap())));
let token_type_ids =
token_type_ids.map(|tensor| tensor.view((-1, *tensor.size().last().unwrap())));
let position_ids =
position_ids.map(|tensor| tensor.view((-1, *tensor.size().last().unwrap())));
let base_model_output = self.albert.forward_t(
input_ids.as_ref(),
mask.as_ref(),
token_type_ids.as_ref(),
position_ids.as_ref(),
input_embeds.as_ref(),
train,
)?;
let logits = base_model_output
.pooled_output
.apply_t(&self.dropout, train)
.apply(&self.classifier)
.view((-1, num_choices));
Ok(AlbertSequenceClassificationOutput {
logits,
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
})
}
}
pub type AlbertForSentenceEmbeddings = AlbertModel;
pub struct AlbertOutput {
pub hidden_state: Tensor,
pub pooled_output: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}
pub struct AlbertMaskedLMOutput {
pub prediction_scores: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}
pub struct AlbertSequenceClassificationOutput {
pub logits: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}
pub struct AlbertTokenClassificationOutput {
pub logits: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}
pub struct AlbertQuestionAnsweringOutput {
pub start_logits: Tensor,
pub end_logits: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}