tiktoken_stream/lib.rs
1//! # tiktoken-stream
2//!
3//! Streaming token counter for partial LLM responses.
4//!
5//! A streaming LLM response arrives as a sequence of small text chunks
6//! (`stream=true` SSE deltas). For UX (progress bars, soft caps) you
7//! often want a running token count without holding the full text in
8//! memory and without re-tokenizing the entire prefix on every chunk.
9//!
10//! This crate is a tiny counter:
11//!
12//! 1. Construct with a tokenizer function (`fn(&str) -> u64`).
13//! 2. Call [`TokenStream::push`] with each delta. The stream forwards
14//! the chunk through your tokenizer and bumps the running total.
15//! 3. Read [`TokenStream::count`] at any time.
16//!
17//! The default estimator (4 chars per token, ceiling) is what
18//! `char-token-est`'s `Family::Gpt` uses; swap in tiktoken via the
19//! constructor when accuracy matters.
20//!
21//! ## Example
22//!
23//! ```
24//! use tiktoken_stream::TokenStream;
25//!
26//! let mut s = TokenStream::new();
27//! s.push("Hello, ");
28//! s.push("world!");
29//! assert!(s.count() >= 1);
30//! ```
31//!
32//! ## Custom estimator
33//!
34//! ```
35//! use tiktoken_stream::TokenStream;
36//!
37//! // One token per whitespace-separated word.
38//! let mut s = TokenStream::with_estimator(|chunk: &str| {
39//! chunk.split_whitespace().count() as u64
40//! });
41//! s.push("the quick brown");
42//! s.push(" fox jumps");
43//! assert_eq!(s.count(), 5);
44//! ```
45
46#![deny(missing_docs)]
47
48type Estimator = Box<dyn FnMut(&str) -> u64 + Send>;
49
50/// Streaming token counter.
51pub struct TokenStream {
52 estimator: Estimator,
53 count: u64,
54 chars: u64,
55}
56
57impl TokenStream {
58 /// Construct a stream using the default 4-chars-per-token estimator.
59 pub fn new() -> Self {
60 Self::with_estimator(default_estimator)
61 }
62
63 /// Construct a stream that runs `est` on each pushed chunk.
64 pub fn with_estimator<F>(est: F) -> Self
65 where
66 F: FnMut(&str) -> u64 + Send + 'static,
67 {
68 Self {
69 estimator: Box::new(est),
70 count: 0,
71 chars: 0,
72 }
73 }
74
75 /// Push the next chunk. Returns the new running count.
76 pub fn push(&mut self, chunk: &str) -> u64 {
77 self.chars += chunk.chars().count() as u64;
78 self.count += (self.estimator)(chunk);
79 self.count
80 }
81
82 /// Current running token count.
83 pub fn count(&self) -> u64 {
84 self.count
85 }
86
87 /// Total characters pushed so far.
88 pub fn chars(&self) -> u64 {
89 self.chars
90 }
91
92 /// Reset the counter (estimator preserved).
93 pub fn reset(&mut self) {
94 self.count = 0;
95 self.chars = 0;
96 }
97}
98
99impl Default for TokenStream {
100 fn default() -> Self {
101 Self::new()
102 }
103}
104
105impl std::fmt::Debug for TokenStream {
106 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107 f.debug_struct("TokenStream")
108 .field("count", &self.count)
109 .field("chars", &self.chars)
110 .finish()
111 }
112}
113
114/// 4-chars-per-token, ceil.
115fn default_estimator(chunk: &str) -> u64 {
116 let chars = chunk.chars().count() as f64;
117 if chars == 0.0 {
118 0
119 } else {
120 (chars / 4.0).ceil() as u64
121 }
122}