Trait dataflow::tokenization::Tokenizer
source · [−]pub trait Tokenizer: Debug + Send + Sync {
fn load() -> Self;
fn tokenize(&self, string: &str) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator,
;
fn batch_tokenize(&self, strings: Vec<String>) -> Vec<Vec<String>>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator,
;
fn untokenize(&self, tokens: Vec<String>) -> String;
fn batch_untokenize(&self, tokens: Vec<Vec<String>>) -> Vec<String>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator,
;
}
Expand description
A trait to implement for all tokenizers, contains basic tokenizing and untokenizing functions
Required methods
Tokenize a single string
Tokenize a batch of strings
fn untokenize(&self, tokens: Vec<String>) -> String
fn untokenize(&self, tokens: Vec<String>) -> String
Untokenize a single string