quicktok 0.1.1

Minimal, fast, multi-threaded implementation of the Byte Pair Encoding (BPE) for LLM tokenization
Documentation
1
2
3
4
5
6
7
8
9
10
11
mod tokenizer;

use pyo3::prelude::*;

pub use crate::tokenizer::{basic::BasicTokenizer, Tokenizer};

#[pymodule]
fn quicktok(_py: Python, module: &PyModule) -> PyResult<()> {
    module.add_class::<BasicTokenizer>()?;
    Ok(())
}