pub struct ApproximateTokEnv { /* private fields */ }Implementations§
Source§impl ApproximateTokEnv
impl ApproximateTokEnv
pub fn new(trie: TokTrie) -> Self
pub fn single_byte() -> Self
pub fn single_byte_env() -> TokEnv
Trait Implementations§
Source§impl TokenizerEnv for ApproximateTokEnv
impl TokenizerEnv for ApproximateTokEnv
Source§fn tokenize_bytes(&self, s: &[u8]) -> Vec<TokenId> ⓘ
fn tokenize_bytes(&self, s: &[u8]) -> Vec<TokenId> ⓘ
Tokenize a given byte sequence.
It may or may not interpret <|special_tokens|> as special.
Source§fn tokenize_is_canonical(&self) -> bool
fn tokenize_is_canonical(&self) -> bool
If this returns true, this tokenizer always returns canonical tokenizations
and can be used for forcing tokens.
Non-canonical tokenizers will typically just use TokTrie::greedy_tokenize().
Source§fn tokenize_bytes_marker(&self, s: &[u8]) -> (Vec<TokenId>, usize)
fn tokenize_bytes_marker(&self, s: &[u8]) -> (Vec<TokenId>, usize)
Tokenize a given byte sequence.
It will interpret text starting with SPECIAL_TOKEN_MARKER as special tokens.
Returns tokens, and number of tokens are should never be re-tokenized
(because they were specified using the special token marker).
Source§fn tokenize(&self, s: &str) -> Vec<TokenId> ⓘ
fn tokenize(&self, s: &str) -> Vec<TokenId> ⓘ
Tokenize a string coming from user. It may or may not interpret <|special_tokens|> as special.
Auto Trait Implementations§
impl Freeze for ApproximateTokEnv
impl RefUnwindSafe for ApproximateTokEnv
impl Send for ApproximateTokEnv
impl Sync for ApproximateTokEnv
impl Unpin for ApproximateTokEnv
impl UnwindSafe for ApproximateTokEnv
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more