pub struct Tokenizer { /* private fields */ }Implementations§
Source§impl Tokenizer
impl Tokenizer
Sourcepub fn new(
core: HashMap<String, Box<dyn Overlapper<u32, u32>>>,
universe: Universe,
special_tokens: SpecialTokens,
) -> Self
pub fn new( core: HashMap<String, Box<dyn Overlapper<u32, u32>>>, universe: Universe, special_tokens: SpecialTokens, ) -> Self
Create a new tokenizer
Sourcepub fn from_config<P: AsRef<Path>>(cfg_path: P) -> Result<Self, TokenizerError>
pub fn from_config<P: AsRef<Path>>(cfg_path: P) -> Result<Self, TokenizerError>
Create a new tokenizer from a config file
Sourcepub fn from_bed<P: AsRef<Path>>(bed_path: P) -> Result<Self, TokenizerError>
pub fn from_bed<P: AsRef<Path>>(bed_path: P) -> Result<Self, TokenizerError>
Create a new tokenizer from a bed file
Sourcepub fn from_auto<P: AsRef<Path>>(path: P) -> Result<Self, TokenizerError>
pub fn from_auto<P: AsRef<Path>>(path: P) -> Result<Self, TokenizerError>
Create a new tokenizer from a file, automatically detecting the type
pub fn tokenize( &self, regions: &[Region], ) -> Result<Vec<String>, TokenizerError>
pub fn encode(&self, regions: &[Region]) -> Result<Vec<u32>, TokenizerError>
pub fn decode(&self, ids: &[u32]) -> Result<Vec<String>, TokenizerError>
pub fn convert_token_to_id(&self, token: &str) -> Option<u32>
pub fn convert_id_to_token(&self, id: u32) -> Option<String>
pub fn get_vocab_size(&self) -> usize
pub fn get_vocab(&self) -> StdHashMap<String, u32>
pub fn get_unk_token(&self) -> String
pub fn get_pad_token(&self) -> String
pub fn get_mask_token(&self) -> String
pub fn get_cls_token(&self) -> String
pub fn get_eos_token(&self) -> String
pub fn get_bos_token(&self) -> String
pub fn get_sep_token(&self) -> String
pub fn get_unk_token_id(&self) -> u32
pub fn get_pad_token_id(&self) -> u32
pub fn get_mask_token_id(&self) -> u32
pub fn get_cls_token_id(&self) -> u32
pub fn get_eos_token_id(&self) -> u32
pub fn get_bos_token_id(&self) -> u32
pub fn get_sep_token_id(&self) -> u32
pub fn get_special_tokens_mask(&self, tokens: &[String]) -> Vec<bool>
pub fn get_special_tokens(&self) -> &SpecialTokens
pub fn get_universe(&self) -> &Universe
Auto Trait Implementations§
impl Freeze for Tokenizer
impl !RefUnwindSafe for Tokenizer
impl Send for Tokenizer
impl Sync for Tokenizer
impl Unpin for Tokenizer
impl !UnwindSafe for Tokenizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more