sapient_tokenizers/lib.rs
1//! `sapient-tokenizers` — HuggingFace-compatible tokenization.
2//!
3//! Wraps the official HuggingFace `tokenizers` Rust crate, which supports:
4//! - BPE (GPT-2, Llama, Falcon, Phi, Qwen)
5//! - WordPiece (BERT, RoBERTa, DistilBERT)
6//! - SentencePiece (T5, Gemma, Llama)
7//!
8//! Also provides Jinja2 chat template rendering for chat models.
9
10pub mod chat;
11pub mod tokenizer;
12
13pub use chat::{ChatMessage, ChatRole, ChatTemplate};
14pub use tokenizer::{SapientTokenizer, TokenizerOptions};