Struct TokTrie

Source
pub struct TokTrie { /* private fields */ }

Implementations§

Source§

impl TokTrie

Source

pub const SPECIAL_TOKEN_MARKER: u8 = 255u8

Source

pub const MAX_DBG_TOKENS: usize = 200usize

Source

pub fn from(info: &TokRxInfo, words: &[Vec<u8>]) -> Self

Source

pub fn filter(&self, filter: &SimpleVob) -> Self

Source

pub fn with_eos_token(&self, eos_token: TokenId) -> Self

Source

pub fn with_info(&self, info: TokRxInfo) -> Self

Source

pub fn build_chat_mode_trie(&self) -> Self

Source

pub fn info(&self) -> &TokRxInfo

Source

pub fn eos_token(&self) -> TokenId

Source

pub fn vocab_size(&self) -> usize

Source

pub fn alloc_token_set(&self) -> SimpleVob

Source

pub fn singleton_token_set(&self, tok: TokenId) -> SimpleVob

Source

pub fn token_set_dbg(&self, ts: &SimpleVob) -> String

Source

pub fn alloc_logits(&self) -> Vec<f32>

Source

pub fn test_trace_tokens(&self, toks: &[u32]) -> String

Source

pub fn tokens_dbg(&self, toks: &[u32]) -> String

Source

pub fn token_dbg(&self, idx: u32) -> String

Source

pub fn token_str(&self, idx: u32) -> String

Source

pub fn token_len(&self, idx: u32) -> usize

Source

pub fn token(&self, idx: u32) -> &[u8]

Source

pub fn decode(&self, tokens: &[TokenId]) -> Vec<u8>

Source

pub fn decode_ext(&self, tokens: &[TokenId], include_special: bool) -> Vec<u8>

Source

pub fn decode_as_special(&self, tok: TokenId) -> Vec<u8>

Source

pub fn decode_raw(&self, tokens: &[TokenId]) -> Vec<u8>

Source

pub fn decode_str(&self, tokens: &[TokenId]) -> String

Source

pub fn decode_raw_to_decode(&self, bytes: &[u8]) -> Vec<u8>

Source

pub fn is_special_token(&self, tok: TokenId) -> bool

Source

pub fn get_special_token(&self, name: &str) -> Option<TokenId>

Source

pub fn get_special_tokens(&self) -> Vec<TokenId>

Source

pub fn greedy_tokenize(&self, bytes: &[u8]) -> Vec<TokenId>

Source

pub fn tokenize_with_greedy_fallback( &self, bytes: &[u8], str_tokenize: impl Fn(&str) -> Vec<TokenId>, ) -> Vec<TokenId>

Source

pub fn has_extensions(&self, bytes: &[u8]) -> bool

Source

pub fn token_id(&self, bytes: &[u8]) -> Option<TokenId>

Source

pub fn prefix_token_id(&self, bytes: &[u8]) -> (TokenId, usize)

Source

pub fn max_token_len(&self) -> usize

Source

pub fn root(&self) -> &TrieNode

Source

pub fn check_against(&self, tokens: &[Vec<u8>])

Source

pub fn child_at_byte<'a>( &'a self, n: &'a TrieNode, byte: u8, ) -> Option<&'a TrieNode>

Source

pub fn all_subtokens(&self, bytes: &[u8]) -> Vec<TokenId>

Source

pub fn node_children(&self, n: &TrieNode) -> NodeChildren<'_>

Source

pub fn child_at_bytes<'a>( &'a self, n: &'a TrieNode, bytes: &[u8], ) -> Option<&'a TrieNode>

Source

pub fn token_id_at_bytes(&self, bytes: &[u8]) -> Option<TokenId>

Source

pub fn chop_tokens( &self, r: &mut impl Recognizer, tokens: &[TokenId], ) -> (usize, usize)

Return how many tokens and bytes need to chopped off tokens, so that we do not limit all possible future tokenizations matching the recognizer.

Source

pub fn has_valid_extensions( &self, r: &mut impl Recognizer, start: &[u8], ) -> bool

Check if add_bias() would have returned any tokens.

Source

pub fn all_prefixes(&self, bytes: &[u8]) -> Vec<TokenId>

Source

pub fn add_bias( &self, r: &mut impl Recognizer, toks: &mut SimpleVob, start: &[u8], )

Source

pub fn all_tokens(&self) -> Vec<Vec<u8>>

Source

pub fn sorted_tokens(&self) -> Vec<(u32, Vec<u8>)>

Source

pub fn trie_stats(&self) -> String

Trait Implementations§

Source§

impl Clone for TokTrie

Source§

fn clone(&self) -> TokTrie

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.