chonkier 0.0.2

🦛 Chonkie, now in Rust 🦀: No-nonsense, ultra-fast, ultra-light chunking library
Documentation
use rayon::prelude::*;

pub trait Tokenizer: Sync {
    fn encode(&self, text: &str) -> Vec<usize>;

    fn encode_batch(&self, texts: &[String]) -> Vec<Vec<usize>> {
        texts.par_iter().map(|text| self.encode(text)).collect()
    }

    fn decode(&self, tokens: &[usize]) -> String;

    fn decode_batch(&self, tokens: &[Vec<usize>]) -> Vec<String> {
        tokens.par_iter().map(|tokens| self.decode(tokens)).collect()
    }

    fn count_tokens(&self, text: &str) -> usize {
        self.encode(text).len()
    }

    fn count_tokens_batch(&self, texts: &[String]) -> Vec<usize> {
        texts.par_iter().map(|text| self.count_tokens(text)).collect()
    }
}