text_splitter

Trait ChunkSizer

Source
pub trait ChunkSizer {
    // Required method
    fn size(&self, chunk: &str) -> usize;
}
Expand description

Determines the size of a given chunk.

Required Methods§

Source

fn size(&self, chunk: &str) -> usize

Determine the size of a given chunk to use for validation

Implementations on Foreign Types§

Source§

impl ChunkSizer for &AlbertTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &BertTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &CtrlTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &DeBERTaTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &DeBERTaV2Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &FNetTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &Gpt2Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &M2M100Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &MarianTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &MBart50Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &NLLBTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &OpenAiGptTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &PegasusTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &ProphetNetTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &ReformerTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &RobertaTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &SentencePieceBpeTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &SentencePieceTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &T5Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &XLMRobertaTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &XLNetTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for &CoreBPE

Available on crate feature tiktoken-rs only.
Source§

fn size(&self, chunk: &str) -> usize

Returns the number of tokens in a given text after tokenization.

Source§

impl ChunkSizer for &Tokenizer

Available on crate feature tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Returns the number of tokens in a given text after tokenization.

§Panics

Will panic if you don’t have a byte-level tokenizer and the splitter encounters text it can’t tokenize.

Source§

impl ChunkSizer for AlbertTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for BertTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for CtrlTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for DeBERTaTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for DeBERTaV2Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for FNetTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for Gpt2Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for M2M100Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for MarianTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for MBart50Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for NLLBTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for OpenAiGptTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for PegasusTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for ProphetNetTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for ReformerTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for RobertaTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for SentencePieceBpeTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for SentencePieceTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for T5Tokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for XLMRobertaTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for XLNetTokenizer

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl ChunkSizer for CoreBPE

Available on crate feature tiktoken-rs only.
Source§

fn size(&self, chunk: &str) -> usize

Returns the number of tokens in a given text after tokenization.

Source§

impl ChunkSizer for Tokenizer

Available on crate feature tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Returns the number of tokens in a given text after tokenization.

§Panics

Will panic if you don’t have a byte-level tokenizer and the splitter encounters text it can’t tokenize.

Source§

impl<V> ChunkSizer for &BaseTokenizer<V>
where V: Vocab + Sync + Send,

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Source§

impl<V> ChunkSizer for BaseTokenizer<V>
where V: Vocab + Sync + Send,

Available on crate feature rust-tokenizers only.
Source§

fn size(&self, chunk: &str) -> usize

Implementors§