pub trait Tokenizer {
// Required methods
fn tokenize_str(&self, doc: &str) -> Result<TokenCollection, TokenizerError>;
fn to_string(
&self,
tokens: TokenCollection
) -> Result<String, TokenizerError>;
// Provided method
fn split_text(
&self,
doc: &str,
max_tokens_per_chunk: usize,
chunk_overlap: usize
) -> Result<Vec<String>, TokenizerError> { ... }
}Required Methods§
sourcefn tokenize_str(&self, doc: &str) -> Result<TokenCollection, TokenizerError>
fn tokenize_str(&self, doc: &str) -> Result<TokenCollection, TokenizerError>
Tokenizes a string.
Parameters
doc: The string to tokenize.
Returns
A Result containing a vector of tokens, or an error if there was a problem.
sourcefn to_string(&self, tokens: TokenCollection) -> Result<String, TokenizerError>
fn to_string(&self, tokens: TokenCollection) -> Result<String, TokenizerError>
Converts a vector of tokens into a string.
Parameters
tokens: The slice of tokens to convert.
Returns
A Result containing a string, or an error if there was a problem.