tokenx_rs/lib.rs
1//! # tokenx-rs
2//!
3//! Fast token count estimation for LLMs at 96% accuracy without a full tokenizer.
4//!
5//! This is a Rust port of [tokenx](https://github.com/johannschopplich/tokenx) by
6//! Johann Schopplich. It uses heuristic rules to estimate how many tokens a piece of
7//! text will consume when sent to an LLM, without needing any vocabulary files.
8//!
9//! ## Quick start
10//!
11//! ```
12//! use tokenx_rs::estimate_token_count;
13//!
14//! let tokens = estimate_token_count("Hello, world!");
15//! assert!(tokens > 0);
16//! ```
17//!
18//! ## When to use this
19//!
20//! - **Token budget estimation** before sending requests to an LLM API.
21//! - **Streaming display** of approximate token counts in real time.
22//! - **Pre-flight checks** to see if a prompt fits a model's context window.
23//!
24//! For exact counts, use a full BPE tokenizer like `tiktoken-rs`.
25
26mod config;
27mod estimator;
28mod utils;
29
30pub use config::{EstimationOptions, LanguageConfig, SplitOptions};
31pub use estimator::{estimate_token_count, estimate_token_count_with_options};
32pub use utils::{
33 is_within_token_limit, is_within_token_limit_with_options, slice_by_tokens,
34 slice_by_tokens_with_options, split_by_tokens, split_by_tokens_with_options,
35};