[][src]Module rust_bert::pipelines::token_classification

Token classification pipeline (Named Entity Recognition, Part-of-Speech tagging)

More generic token classification pipeline, works with multiple models (Bert, Roberta)

use rust_bert::pipelines::token_classification::{TokenClassificationModel,TokenClassificationConfig};
use rust_bert::resources::{Resource,RemoteResource};
use rust_bert::bert::{BertModelResources, BertVocabResources, BertConfigResources};
use rust_bert::pipelines::common::ModelType;

//Load a configuration
use rust_bert::pipelines::token_classification::LabelAggregationOption;
let config = TokenClassificationConfig::new(ModelType::Bert,
   Resource::Remote(RemoteResource::from_pretrained(BertModelResources::BERT_NER)),
   Resource::Remote(RemoteResource::from_pretrained(BertVocabResources::BERT_NER)),
   Resource::Remote(RemoteResource::from_pretrained(BertConfigResources::BERT_NER)),
   None, //merges resource only relevant with ModelType::Roberta
   false, //lowercase
   LabelAggregationOption::Mode
);

//Create the model
let token_classification_model = TokenClassificationModel::new(config)?;

let input = [
    "My name is Amy. I live in Paris.",
    "Paris is a city in France."
];
let output = token_classification_model.predict(&input, true, true); //ignore_first_label = true (only returns the NER parts, ignoring first label O)

Output:

use rust_tokenizers::preprocessing::tokenizer::base_tokenizer::Mask::Special;
use rust_tokenizers::preprocessing::tokenizer::base_tokenizer::{Offset, Mask};
[
   Token { text: String::from("[CLS]"), score: 0.9995001554489136, label: String::from("O"), label_index: 0, sentence: 0, index: 0, word_index: 0, offset: None, mask: Special },
   Token { text: String::from("My"), score: 0.9980450868606567, label: String::from("O"), label_index: 0, sentence: 0, index: 1, word_index: 1, offset: Some(Offset { begin: 0, end: 2 }), mask: Mask::None },
   Token { text: String::from("name"), score: 0.9995062351226807, label: String::from("O"), label_index: 0, sentence: 0, index: 2, word_index: 2, offset: Some(Offset { begin: 3, end: 7 }), mask: Mask::None },
   Token { text: String::from("is"), score: 0.9997343420982361, label: String::from("O"), label_index: 0, sentence: 0, index: 3, word_index: 3, offset: Some(Offset { begin: 8, end: 10 }), mask: Mask::None },
   Token { text: String::from("Amélie"), score: 0.9913727683112525, label: String::from("I-PER"), label_index: 4, sentence: 0, index: 4, word_index: 4, offset: Some(Offset { begin: 11, end: 17 }), mask: Mask::None }
   // ...
]

Structs

Token

Token generated by a TokenClassificationModel

TokenClassificationConfig

Configuration for TokenClassificationModel

TokenClassificationModel

TokenClassificationModel for Named Entity Recognition or Part-of-Speech tagging

Enums

LabelAggregationOption

Enum defining the label aggregation method for sub tokens

TokenClassificationOption

Abstraction that holds one particular token sequence classifier model, for any of the supported models