Tokenizer

Struct Tokenizer 

Source
pub struct Tokenizer { /* private fields */ }

Implementations§

Source§

impl Tokenizer

Source

pub fn new( core: HashMap<String, Box<dyn Overlapper<u32, u32>>>, universe: Universe, special_tokens: SpecialTokens, ) -> Self

Create a new tokenizer

Source

pub fn from_config<P: AsRef<Path>>(cfg_path: P) -> Result<Self, TokenizerError>

Create a new tokenizer from a config file

Source

pub fn from_bed<P: AsRef<Path>>(bed_path: P) -> Result<Self, TokenizerError>

Create a new tokenizer from a bed file

Source

pub fn from_auto<P: AsRef<Path>>(path: P) -> Result<Self, TokenizerError>

Create a new tokenizer from a file, automatically detecting the type

Source

pub fn tokenize( &self, regions: &[Region], ) -> Result<Vec<String>, TokenizerError>

Source

pub fn encode(&self, regions: &[Region]) -> Result<Vec<u32>, TokenizerError>

Source

pub fn decode(&self, ids: &[u32]) -> Result<Vec<String>, TokenizerError>

Source

pub fn convert_token_to_id(&self, token: &str) -> Option<u32>

Source

pub fn convert_id_to_token(&self, id: u32) -> Option<String>

Source

pub fn get_vocab_size(&self) -> usize

Source

pub fn get_vocab(&self) -> StdHashMap<String, u32>

Source

pub fn get_unk_token(&self) -> String

Source

pub fn get_pad_token(&self) -> String

Source

pub fn get_mask_token(&self) -> String

Source

pub fn get_cls_token(&self) -> String

Source

pub fn get_eos_token(&self) -> String

Source

pub fn get_bos_token(&self) -> String

Source

pub fn get_sep_token(&self) -> String

Source

pub fn get_unk_token_id(&self) -> u32

Source

pub fn get_pad_token_id(&self) -> u32

Source

pub fn get_mask_token_id(&self) -> u32

Source

pub fn get_cls_token_id(&self) -> u32

Source

pub fn get_eos_token_id(&self) -> u32

Source

pub fn get_bos_token_id(&self) -> u32

Source

pub fn get_sep_token_id(&self) -> u32

Source

pub fn get_special_tokens_mask(&self, tokens: &[String]) -> Vec<bool>

Source

pub fn get_special_tokens(&self) -> &SpecialTokens

Source

pub fn get_universe(&self) -> &Universe

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.