Skip to main content

tiktoken_stream/
lib.rs

1//! # tiktoken-stream
2//!
3//! Streaming token counter for partial LLM responses.
4//!
5//! A streaming LLM response arrives as a sequence of small text chunks
6//! (`stream=true` SSE deltas). For UX (progress bars, soft caps) you
7//! often want a running token count without holding the full text in
8//! memory and without re-tokenizing the entire prefix on every chunk.
9//!
10//! This crate is a tiny counter:
11//!
12//! 1. Construct with a tokenizer function (`fn(&str) -> u64`).
13//! 2. Call [`TokenStream::push`] with each delta. The stream forwards
14//!    the chunk through your tokenizer and bumps the running total.
15//! 3. Read [`TokenStream::count`] at any time.
16//!
17//! The default estimator (4 chars per token, ceiling) is what
18//! `char-token-est`'s `Family::Gpt` uses; swap in tiktoken via the
19//! constructor when accuracy matters.
20//!
21//! ## Example
22//!
23//! ```
24//! use tiktoken_stream::TokenStream;
25//!
26//! let mut s = TokenStream::new();
27//! s.push("Hello, ");
28//! s.push("world!");
29//! assert!(s.count() >= 1);
30//! ```
31//!
32//! ## Custom estimator
33//!
34//! ```
35//! use tiktoken_stream::TokenStream;
36//!
37//! // One token per whitespace-separated word.
38//! let mut s = TokenStream::with_estimator(|chunk: &str| {
39//!     chunk.split_whitespace().count() as u64
40//! });
41//! s.push("the quick brown");
42//! s.push(" fox jumps");
43//! assert_eq!(s.count(), 5);
44//! ```
45
46#![deny(missing_docs)]
47
48type Estimator = Box<dyn FnMut(&str) -> u64 + Send>;
49
50/// Streaming token counter.
51pub struct TokenStream {
52    estimator: Estimator,
53    count: u64,
54    chars: u64,
55}
56
57impl TokenStream {
58    /// Construct a stream using the default 4-chars-per-token estimator.
59    pub fn new() -> Self {
60        Self::with_estimator(default_estimator)
61    }
62
63    /// Construct a stream that runs `est` on each pushed chunk.
64    pub fn with_estimator<F>(est: F) -> Self
65    where
66        F: FnMut(&str) -> u64 + Send + 'static,
67    {
68        Self {
69            estimator: Box::new(est),
70            count: 0,
71            chars: 0,
72        }
73    }
74
75    /// Push the next chunk. Returns the new running count.
76    pub fn push(&mut self, chunk: &str) -> u64 {
77        self.chars += chunk.chars().count() as u64;
78        self.count += (self.estimator)(chunk);
79        self.count
80    }
81
82    /// Current running token count.
83    pub fn count(&self) -> u64 {
84        self.count
85    }
86
87    /// Total characters pushed so far.
88    pub fn chars(&self) -> u64 {
89        self.chars
90    }
91
92    /// Reset the counter (estimator preserved).
93    pub fn reset(&mut self) {
94        self.count = 0;
95        self.chars = 0;
96    }
97}
98
99impl Default for TokenStream {
100    fn default() -> Self {
101        Self::new()
102    }
103}
104
105impl std::fmt::Debug for TokenStream {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        f.debug_struct("TokenStream")
108            .field("count", &self.count)
109            .field("chars", &self.chars)
110            .finish()
111    }
112}
113
114/// 4-chars-per-token, ceil.
115fn default_estimator(chunk: &str) -> u64 {
116    let chars = chunk.chars().count() as f64;
117    if chars == 0.0 {
118        0
119    } else {
120        (chars / 4.0).ceil() as u64
121    }
122}