[−][src]Trait rust_tokenizers::preprocessing::vocab::base_vocab::Vocab
Required methods
fn unknown_value() -> &'static str
Associative function returning the unknown value
fn get_unknown_value(&self) -> &'static str
Returns the unknown value on an instance
fn values(&self) -> &HashMap<String, i64>
Return the map of token strings to IDs
fn indices(&self) -> &HashMap<i64, String>
Return the map of token IDs to strings
fn special_values(&self) -> &HashMap<String, i64>
Return the map of token strings to IDs
fn special_indices(&self) -> &HashMap<i64, String>
Return the map of token IDs to strings for special values
fn from_file(path: &str) -> Result<Self, TokenizerError> where
Self: Sized,
Self: Sized,
Read a vocabulary file from file
fn token_to_id(&self, token: &str) -> i64
fn id_to_token(&self, id: &i64) -> String
Provided methods
fn read_vocab_file(path: &str) -> Result<HashMap<String, i64>, TokenizerError>
Read a Bert-style vocab.txt file (single column, one token per line)
fn _token_to_id(
&self,
token: &str,
values: &HashMap<String, i64>,
special_values: &HashMap<String, i64>,
unknown_value: &str
) -> i64
&self,
token: &str,
values: &HashMap<String, i64>,
special_values: &HashMap<String, i64>,
unknown_value: &str
) -> i64
fn _id_to_token(
&self,
id: &i64,
indices: &HashMap<i64, String>,
special_indices: &HashMap<i64, String>,
unknown_value: &str
) -> String
&self,
id: &i64,
indices: &HashMap<i64, String>,
special_indices: &HashMap<i64, String>,
unknown_value: &str
) -> String
fn _register_as_special_value(
token: &str,
values: &HashMap<String, i64>,
special_values: &mut HashMap<String, i64>
) -> Result<(), TokenizerError>
token: &str,
values: &HashMap<String, i64>,
special_values: &mut HashMap<String, i64>
) -> Result<(), TokenizerError>
fn convert_tokens_to_ids(&self, tokens: Vec<&str>) -> Vec<i64>
Implementors
impl Vocab for AlbertVocab
[src]
fn unknown_value() -> &'static str
[src]
fn get_unknown_value(&self) -> &'static str
[src]
fn values(&self) -> &HashMap<String, i64>
[src]
fn indices(&self) -> &HashMap<i64, String>
[src]
fn special_values(&self) -> &HashMap<String, i64>
[src]
fn special_indices(&self) -> &HashMap<i64, String>
[src]
fn from_file(path: &str) -> Result<AlbertVocab, TokenizerError>
[src]
fn token_to_id(&self, token: &str) -> i64
[src]
fn id_to_token(&self, id: &i64) -> String
[src]
impl Vocab for BaseVocab
[src]
fn unknown_value() -> &'static str
[src]
fn get_unknown_value(&self) -> &'static str
[src]
fn values(&self) -> &HashMap<String, i64>
[src]
fn indices(&self) -> &HashMap<i64, String>
[src]
fn special_values(&self) -> &HashMap<String, i64>
[src]
fn special_indices(&self) -> &HashMap<i64, String>
[src]
fn from_file(path: &str) -> Result<BaseVocab, TokenizerError>
[src]
fn token_to_id(&self, token: &str) -> i64
[src]
fn id_to_token(&self, id: &i64) -> String
[src]
impl Vocab for BertVocab
[src]
fn unknown_value() -> &'static str
[src]
fn get_unknown_value(&self) -> &'static str
[src]
fn values(&self) -> &HashMap<String, i64>
[src]
fn indices(&self) -> &HashMap<i64, String>
[src]
fn special_values(&self) -> &HashMap<String, i64>
[src]
fn special_indices(&self) -> &HashMap<i64, String>
[src]
fn from_file(path: &str) -> Result<BertVocab, TokenizerError>
[src]
fn token_to_id(&self, token: &str) -> i64
[src]
fn id_to_token(&self, id: &i64) -> String
[src]
impl Vocab for Gpt2Vocab
[src]
fn unknown_value() -> &'static str
[src]
fn get_unknown_value(&self) -> &'static str
[src]
fn values(&self) -> &HashMap<String, i64>
[src]
fn indices(&self) -> &HashMap<i64, String>
[src]
fn special_values(&self) -> &HashMap<String, i64>
[src]
fn special_indices(&self) -> &HashMap<i64, String>
[src]
fn from_file(path: &str) -> Result<Gpt2Vocab, TokenizerError>
[src]
fn token_to_id(&self, token: &str) -> i64
[src]
fn id_to_token(&self, id: &i64) -> String
[src]
impl Vocab for MarianVocab
[src]
fn unknown_value() -> &'static str
[src]
fn get_unknown_value(&self) -> &'static str
[src]
fn values(&self) -> &HashMap<String, i64>
[src]
fn indices(&self) -> &HashMap<i64, String>
[src]
fn special_values(&self) -> &HashMap<String, i64>
[src]
fn special_indices(&self) -> &HashMap<i64, String>
[src]
fn from_file(path: &str) -> Result<MarianVocab, TokenizerError>
[src]
fn token_to_id(&self, token: &str) -> i64
[src]
fn id_to_token(&self, id: &i64) -> String
[src]
impl Vocab for OpenAiGptVocab
[src]
fn unknown_value() -> &'static str
[src]
fn get_unknown_value(&self) -> &'static str
[src]
fn values(&self) -> &HashMap<String, i64>
[src]
fn indices(&self) -> &HashMap<i64, String>
[src]
fn special_values(&self) -> &HashMap<String, i64>
[src]
fn special_indices(&self) -> &HashMap<i64, String>
[src]
fn from_file(path: &str) -> Result<OpenAiGptVocab, TokenizerError>
[src]
fn token_to_id(&self, token: &str) -> i64
[src]
fn id_to_token(&self, id: &i64) -> String
[src]
impl Vocab for RobertaVocab
[src]
fn unknown_value() -> &'static str
[src]
fn get_unknown_value(&self) -> &'static str
[src]
fn values(&self) -> &HashMap<String, i64>
[src]
fn indices(&self) -> &HashMap<i64, String>
[src]
fn special_values(&self) -> &HashMap<String, i64>
[src]
fn special_indices(&self) -> &HashMap<i64, String>
[src]
fn from_file(path: &str) -> Result<RobertaVocab, TokenizerError>
[src]
Read a Roberta-style vocab.json file