use crate::albert::AlbertForSequenceClassification;
use crate::bart::{
BartConfigResources, BartForSequenceClassification, BartMergesResources, BartModelResources,
BartVocabResources,
};
use crate::bert::BertForSequenceClassification;
use crate::distilbert::DistilBertModelClassifier;
use crate::longformer::LongformerForSequenceClassification;
use crate::mobilebert::MobileBertForSequenceClassification;
use crate::pipelines::common::{ConfigOption, ModelType, TokenizerOption};
use crate::pipelines::sequence_classification::Label;
use crate::resources::{RemoteResource, Resource};
use crate::roberta::RobertaForSequenceClassification;
use crate::xlnet::XLNetForSequenceClassification;
use crate::RustBertError;
use rust_tokenizers::tokenizer::TruncationStrategy;
use rust_tokenizers::TokenizedInput;
use std::borrow::Borrow;
use std::ops::Deref;
use tch::kind::Kind::{Bool, Float};
use tch::nn::VarStore;
use tch::{nn, no_grad, Device, Tensor};
pub struct ZeroShotClassificationConfig {
pub model_type: ModelType,
pub model_resource: Resource,
pub config_resource: Resource,
pub vocab_resource: Resource,
pub merges_resource: Option<Resource>,
pub lower_case: bool,
pub strip_accents: Option<bool>,
pub add_prefix_space: Option<bool>,
pub device: Device,
}
impl ZeroShotClassificationConfig {
pub fn new(
model_type: ModelType,
model_resource: Resource,
config_resource: Resource,
vocab_resource: Resource,
merges_resource: Option<Resource>,
lower_case: bool,
strip_accents: impl Into<Option<bool>>,
add_prefix_space: impl Into<Option<bool>>,
) -> ZeroShotClassificationConfig {
ZeroShotClassificationConfig {
model_type,
model_resource,
config_resource,
vocab_resource,
merges_resource,
lower_case,
strip_accents: strip_accents.into(),
add_prefix_space: add_prefix_space.into(),
device: Device::cuda_if_available(),
}
}
}
impl Default for ZeroShotClassificationConfig {
fn default() -> ZeroShotClassificationConfig {
ZeroShotClassificationConfig {
model_type: ModelType::Bart,
model_resource: Resource::Remote(RemoteResource::from_pretrained(
BartModelResources::BART_MNLI,
)),
config_resource: Resource::Remote(RemoteResource::from_pretrained(
BartConfigResources::BART_MNLI,
)),
vocab_resource: Resource::Remote(RemoteResource::from_pretrained(
BartVocabResources::BART_MNLI,
)),
merges_resource: Some(Resource::Remote(RemoteResource::from_pretrained(
BartMergesResources::BART_MNLI,
))),
lower_case: false,
strip_accents: None,
add_prefix_space: None,
device: Device::cuda_if_available(),
}
}
}
pub enum ZeroShotClassificationOption {
Bart(BartForSequenceClassification),
Bert(BertForSequenceClassification),
DistilBert(DistilBertModelClassifier),
MobileBert(MobileBertForSequenceClassification),
Roberta(RobertaForSequenceClassification),
XLMRoberta(RobertaForSequenceClassification),
Albert(AlbertForSequenceClassification),
XLNet(XLNetForSequenceClassification),
Longformer(LongformerForSequenceClassification),
}
impl ZeroShotClassificationOption {
pub fn new<'p, P>(
model_type: ModelType,
p: P,
config: &ConfigOption,
) -> Result<Self, RustBertError>
where
P: Borrow<nn::Path<'p>>,
{
match model_type {
ModelType::Bart => {
if let ConfigOption::Bart(config) = config {
Ok(ZeroShotClassificationOption::Bart(
BartForSequenceClassification::new(p, config),
))
} else {
Err(RustBertError::InvalidConfigurationError(
"You can only supply a BartConfig for Bart!".to_string(),
))
}
}
ModelType::Bert => {
if let ConfigOption::Bert(config) = config {
Ok(ZeroShotClassificationOption::Bert(
BertForSequenceClassification::new(p, config),
))
} else {
Err(RustBertError::InvalidConfigurationError(
"You can only supply a BertConfig for Bert!".to_string(),
))
}
}
ModelType::DistilBert => {
if let ConfigOption::DistilBert(config) = config {
Ok(ZeroShotClassificationOption::DistilBert(
DistilBertModelClassifier::new(p, config),
))
} else {
Err(RustBertError::InvalidConfigurationError(
"You can only supply a DistilBertConfig for DistilBert!".to_string(),
))
}
}
ModelType::MobileBert => {
if let ConfigOption::MobileBert(config) = config {
Ok(ZeroShotClassificationOption::MobileBert(
MobileBertForSequenceClassification::new(p, config),
))
} else {
Err(RustBertError::InvalidConfigurationError(
"You can only supply a MobileBertConfig for MobileBert!".to_string(),
))
}
}
ModelType::Roberta => {
if let ConfigOption::Bert(config) = config {
Ok(ZeroShotClassificationOption::Roberta(
RobertaForSequenceClassification::new(p, config),
))
} else {
Err(RustBertError::InvalidConfigurationError(
"You can only supply a BertConfig for Roberta!".to_string(),
))
}
}
ModelType::XLMRoberta => {
if let ConfigOption::Bert(config) = config {
Ok(ZeroShotClassificationOption::XLMRoberta(
RobertaForSequenceClassification::new(p, config),
))
} else {
Err(RustBertError::InvalidConfigurationError(
"You can only supply a BertConfig for Roberta!".to_string(),
))
}
}
ModelType::Albert => {
if let ConfigOption::Albert(config) = config {
Ok(ZeroShotClassificationOption::Albert(
AlbertForSequenceClassification::new(p, config),
))
} else {
Err(RustBertError::InvalidConfigurationError(
"You can only supply an AlbertConfig for Albert!".to_string(),
))
}
}
ModelType::XLNet => {
if let ConfigOption::XLNet(config) = config {
Ok(ZeroShotClassificationOption::XLNet(
XLNetForSequenceClassification::new(p, config).unwrap(),
))
} else {
Err(RustBertError::InvalidConfigurationError(
"You can only supply an AlbertConfig for Albert!".to_string(),
))
}
}
ModelType::Longformer => {
if let ConfigOption::Longformer(config) = config {
Ok(ZeroShotClassificationOption::Longformer(
LongformerForSequenceClassification::new(p, config),
))
} else {
Err(RustBertError::InvalidConfigurationError(
"You can only supply a LongformerConfig for Longformer!".to_string(),
))
}
}
_ => Err(RustBertError::InvalidConfigurationError(format!(
"Zero shot classification not implemented for {:?}!",
model_type
))),
}
}
pub fn model_type(&self) -> ModelType {
match *self {
Self::Bart(_) => ModelType::Bart,
Self::Bert(_) => ModelType::Bert,
Self::Roberta(_) => ModelType::Roberta,
Self::XLMRoberta(_) => ModelType::Roberta,
Self::DistilBert(_) => ModelType::DistilBert,
Self::MobileBert(_) => ModelType::MobileBert,
Self::Albert(_) => ModelType::Albert,
Self::XLNet(_) => ModelType::XLNet,
Self::Longformer(_) => ModelType::Longformer,
}
}
pub fn forward_t(
&self,
input_ids: Option<&Tensor>,
mask: Option<&Tensor>,
token_type_ids: Option<&Tensor>,
position_ids: Option<&Tensor>,
input_embeds: Option<&Tensor>,
train: bool,
) -> Tensor {
match *self {
Self::Bart(ref model) => {
model
.forward_t(
input_ids.expect("`input_ids` must be provided for BART models"),
mask,
None,
None,
None,
train,
)
.decoder_output
}
Self::Bert(ref model) => {
model
.forward_t(
input_ids,
mask,
token_type_ids,
position_ids,
input_embeds,
train,
)
.logits
}
Self::DistilBert(ref model) => {
model
.forward_t(input_ids, mask, input_embeds, train)
.expect("Error in distilbert forward_t")
.logits
}
Self::MobileBert(ref model) => {
model
.forward_t(input_ids, None, None, input_embeds, mask, train)
.expect("Error in mobilebert forward_t")
.logits
}
Self::Roberta(ref model) | Self::XLMRoberta(ref model) => {
model
.forward_t(
input_ids,
mask,
token_type_ids,
position_ids,
input_embeds,
train,
)
.logits
}
Self::Albert(ref model) => {
model
.forward_t(
input_ids,
mask,
token_type_ids,
position_ids,
input_embeds,
train,
)
.logits
}
Self::XLNet(ref model) => {
model
.forward_t(
input_ids,
mask,
None,
None,
None,
token_type_ids,
input_embeds,
train,
)
.logits
}
Self::Longformer(ref model) => {
model
.forward_t(
input_ids,
mask,
None,
token_type_ids,
position_ids,
input_embeds,
train,
)
.expect("Error in Longformer forward pass.")
.logits
}
}
}
}
pub struct ZeroShotClassificationModel {
tokenizer: TokenizerOption,
zero_shot_classifier: ZeroShotClassificationOption,
var_store: VarStore,
}
impl ZeroShotClassificationModel {
pub fn new(
config: ZeroShotClassificationConfig,
) -> Result<ZeroShotClassificationModel, RustBertError> {
let config_path = config.config_resource.get_local_path()?;
let vocab_path = config.vocab_resource.get_local_path()?;
let weights_path = config.model_resource.get_local_path()?;
let merges_path = if let Some(merges_resource) = &config.merges_resource {
Some(merges_resource.get_local_path()?)
} else {
None
};
let device = config.device;
let tokenizer = TokenizerOption::from_file(
config.model_type,
vocab_path.to_str().unwrap(),
merges_path.as_deref().map(|path| path.to_str().unwrap()),
config.lower_case,
config.strip_accents,
config.add_prefix_space,
)?;
let mut var_store = VarStore::new(device);
let model_config = ConfigOption::from_file(config.model_type, config_path);
let zero_shot_classifier =
ZeroShotClassificationOption::new(config.model_type, &var_store.root(), &model_config)?;
var_store.load(weights_path)?;
Ok(ZeroShotClassificationModel {
tokenizer,
zero_shot_classifier,
var_store,
})
}
fn prepare_for_model<'a, S, T>(
&self,
inputs: S,
labels: T,
template: Option<Box<dyn Fn(&str) -> String>>,
max_len: usize,
) -> (Tensor, Tensor)
where
S: AsRef<[&'a str]>,
T: AsRef<[&'a str]>,
{
let label_sentences: Vec<String> = match template {
Some(function) => labels
.as_ref()
.iter()
.map(|label| function(label))
.collect(),
None => labels
.as_ref()
.iter()
.map(|label| format!("This example is about {}.", label))
.collect(),
};
let text_pair_list = inputs
.as_ref()
.iter()
.flat_map(|input| {
label_sentences
.iter()
.map(move |label_sentence| (input.deref(), label_sentence.as_str()))
})
.collect::<Vec<(&str, &str)>>();
let tokenized_input: Vec<TokenizedInput> = self.tokenizer.encode_pair_list(
text_pair_list.as_ref(),
max_len,
&TruncationStrategy::LongestFirst,
0,
);
let max_len = tokenized_input
.iter()
.map(|input| input.token_ids.len())
.max()
.unwrap();
let tokenized_input_tensors: Vec<tch::Tensor> =
tokenized_input
.iter()
.map(|input| input.token_ids.clone())
.map(|mut input| {
input.extend(vec![self.tokenizer.get_pad_id().expect(
"The Tokenizer used for zero shot classification should contain a PAD id"
); max_len - input.len()]);
input
})
.map(|input| Tensor::of_slice(&(input)))
.collect::<Vec<_>>();
let tokenized_input_tensors =
Tensor::stack(tokenized_input_tensors.as_slice(), 0).to(self.var_store.device());
let mask = tokenized_input_tensors
.ne(self
.tokenizer
.get_pad_id()
.expect("The Tokenizer used for zero shot classification should contain a PAD id"))
.to_kind(Bool);
(tokenized_input_tensors, mask)
}
pub fn predict<'a, S, T>(
&self,
inputs: S,
labels: T,
template: Option<Box<dyn Fn(&str) -> String>>,
max_length: usize,
) -> Vec<Label>
where
S: AsRef<[&'a str]>,
T: AsRef<[&'a str]>,
{
let num_inputs = inputs.as_ref().len();
let (input_tensor, mask) =
self.prepare_for_model(inputs.as_ref(), labels.as_ref(), template, max_length);
let output = no_grad(|| {
let output = self.zero_shot_classifier.forward_t(
Some(&input_tensor),
Some(&mask),
None,
None,
None,
false,
);
output.view((num_inputs as i64, labels.as_ref().len() as i64, -1i64))
});
let scores = output.softmax(1, Float).select(-1, -1);
let label_indices = scores.as_ref().argmax(-1, true).squeeze_dim(1);
let scores = scores
.gather(1, &label_indices.unsqueeze(-1), false)
.squeeze_dim(1);
let label_indices = label_indices.iter::<i64>().unwrap().collect::<Vec<i64>>();
let scores = scores.iter::<f64>().unwrap().collect::<Vec<f64>>();
let mut output_labels: Vec<Label> = vec![];
for sentence_idx in 0..label_indices.len() {
let label_string = labels.as_ref()[label_indices[sentence_idx] as usize].to_string();
let label = Label {
text: label_string,
score: scores[sentence_idx],
id: label_indices[sentence_idx],
sentence: sentence_idx,
};
output_labels.push(label)
}
output_labels
}
pub fn predict_multilabel<'a, S, T>(
&self,
inputs: S,
labels: T,
template: Option<Box<dyn Fn(&str) -> String>>,
max_length: usize,
) -> Vec<Vec<Label>>
where
S: AsRef<[&'a str]>,
T: AsRef<[&'a str]>,
{
let num_inputs = inputs.as_ref().len();
let (input_tensor, mask) =
self.prepare_for_model(inputs.as_ref(), labels.as_ref(), template, max_length);
let output = no_grad(|| {
let output = self.zero_shot_classifier.forward_t(
Some(&input_tensor),
Some(&mask),
None,
None,
None,
false,
);
output.view((num_inputs as i64, labels.as_ref().len() as i64, -1i64))
});
let scores = output.slice(-1, 0, 3, 2).softmax(-1, Float).select(-1, -1);
let mut output_labels = vec![];
for sentence_idx in 0..num_inputs {
let mut sentence_labels = vec![];
for (label_index, score) in scores
.select(0, sentence_idx as i64)
.iter::<f64>()
.unwrap()
.enumerate()
{
let label_string = labels.as_ref()[label_index].to_string();
let label = Label {
text: label_string,
score,
id: label_index as i64,
sentence: sentence_idx,
};
sentence_labels.push(label);
}
output_labels.push(sentence_labels);
}
output_labels
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
#[ignore] fn test() {
let config = ZeroShotClassificationConfig::default();
let _: Box<dyn Send> = Box::new(ZeroShotClassificationModel::new(config));
}
}