Trait text_splitter::ChunkSizer

source ·
pub trait ChunkSizer {
    // Required method
    fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize;
}
Expand description

Determines the size of a given chunk.

Required Methods§

source

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

Determine the size of a given chunk to use for validation

Implementations on Foreign Types§

source§

impl ChunkSizer for &AlbertTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &BertTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &CtrlTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &DeBERTaTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &DeBERTaV2Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &FNetTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &Gpt2Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &M2M100Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &MarianTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &MBart50Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &NLLBTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &OpenAiGptTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &PegasusTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &ProphetNetTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &ReformerTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &RobertaTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &SentencePieceBpeTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &SentencePieceTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &T5Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &XLMRobertaTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &XLNetTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for &CoreBPE

Available on crate feature tiktoken-rs only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

Returns the number of tokens in a given text after tokenization.

source§

impl ChunkSizer for &Tokenizer

Available on crate feature tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

Returns the number of tokens in a given text after tokenization.

§Panics

Will panic if you don’t have a byte-level tokenizer and the splitter encounters text it can’t tokenize.

source§

impl ChunkSizer for AlbertTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for BertTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for CtrlTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for DeBERTaTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for DeBERTaV2Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for FNetTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for Gpt2Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for M2M100Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for MarianTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for MBart50Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for NLLBTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for OpenAiGptTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for PegasusTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for ProphetNetTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for ReformerTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for RobertaTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for SentencePieceBpeTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for SentencePieceTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for T5Tokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for XLMRobertaTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for XLNetTokenizer

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl ChunkSizer for CoreBPE

Available on crate feature tiktoken-rs only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

Returns the number of tokens in a given text after tokenization.

source§

impl ChunkSizer for Tokenizer

Available on crate feature tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

Returns the number of tokens in a given text after tokenization.

§Panics

Will panic if you don’t have a byte-level tokenizer and the splitter encounters text it can’t tokenize.

source§

impl<V> ChunkSizer for &BaseTokenizer<V>
where V: Vocab + Sync + Send,

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

source§

impl<V> ChunkSizer for BaseTokenizer<V>
where V: Vocab + Sync + Send,

Available on crate feature rust-tokenizers only.
source§

fn chunk_size(&self, chunk: &str, capacity: &ChunkCapacity) -> ChunkSize

Implementors§