Trait text_splitter::ChunkSizer
source · pub trait ChunkSizer {
// Required method
fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize;
}
Expand description
Determines the size of a given chunk.
Required Methods§
sourcefn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
Determine the size of a given chunk to use for validation
Implementations on Foreign Types§
source§impl ChunkSizer for &AlbertTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &AlbertTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &BertTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &BertTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &CtrlTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &CtrlTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &DeBERTaTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &DeBERTaTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &DeBERTaV2Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &DeBERTaV2Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &FNetTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &FNetTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &Gpt2Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &Gpt2Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &M2M100Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &M2M100Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &MarianTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &MarianTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &MBart50Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &MBart50Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &NLLBTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &NLLBTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &OpenAiGptTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &OpenAiGptTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &PegasusTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &PegasusTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &ProphetNetTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &ProphetNetTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &ReformerTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &ReformerTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &RobertaTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &RobertaTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &SentencePieceBpeTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &SentencePieceBpeTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &SentencePieceTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &SentencePieceTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &T5Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &T5Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &XLMRobertaTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &XLMRobertaTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &XLNetTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for &XLNetTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for &CoreBPE
Available on crate feature tiktoken-rs
only.
impl ChunkSizer for &CoreBPE
Available on crate feature
tiktoken-rs
only.source§fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
Returns the number of tokens in a given text after tokenization.
source§impl ChunkSizer for &Tokenizer
Available on crate feature tokenizers
only.
impl ChunkSizer for &Tokenizer
Available on crate feature
tokenizers
only.source§fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
Returns the number of tokens in a given text after tokenization.
§Panics
Will panic if you don’t have a byte-level tokenizer and the splitter encounters text it can’t tokenize.
source§impl ChunkSizer for AlbertTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for AlbertTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for BertTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for BertTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for CtrlTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for CtrlTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for DeBERTaTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for DeBERTaTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for DeBERTaV2Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for DeBERTaV2Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for FNetTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for FNetTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for Gpt2Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for Gpt2Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for M2M100Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for M2M100Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for MarianTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for MarianTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for MBart50Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for MBart50Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for NLLBTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for NLLBTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for OpenAiGptTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for OpenAiGptTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for PegasusTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for PegasusTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for ProphetNetTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for ProphetNetTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for ReformerTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for ReformerTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for RobertaTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for RobertaTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for SentencePieceBpeTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for SentencePieceBpeTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for SentencePieceTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for SentencePieceTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for T5Tokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for T5Tokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for XLMRobertaTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for XLMRobertaTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for XLNetTokenizer
Available on crate feature rust-tokenizers
only.
impl ChunkSizer for XLNetTokenizer
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl ChunkSizer for CoreBPE
Available on crate feature tiktoken-rs
only.
impl ChunkSizer for CoreBPE
Available on crate feature
tiktoken-rs
only.source§fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
Returns the number of tokens in a given text after tokenization.
source§impl ChunkSizer for Tokenizer
Available on crate feature tokenizers
only.
impl ChunkSizer for Tokenizer
Available on crate feature
tokenizers
only.source§fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
Returns the number of tokens in a given text after tokenization.
§Panics
Will panic if you don’t have a byte-level tokenizer and the splitter encounters text it can’t tokenize.
source§impl<V> ChunkSizer for &BaseTokenizer<V>
Available on crate feature rust-tokenizers
only.
impl<V> ChunkSizer for &BaseTokenizer<V>
Available on crate feature
rust-tokenizers
only.fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize
source§impl<V> ChunkSizer for BaseTokenizer<V>
Available on crate feature rust-tokenizers
only.
impl<V> ChunkSizer for BaseTokenizer<V>
Available on crate feature
rust-tokenizers
only.