tiktoken-stream 0.1.0

Streaming token counter for partial LLM responses. Accumulates token count across chunks without holding the full text. Pluggable estimator function. Zero deps.
Documentation
//! # tiktoken-stream
//!
//! Streaming token counter for partial LLM responses.
//!
//! A streaming LLM response arrives as a sequence of small text chunks
//! (`stream=true` SSE deltas). For UX (progress bars, soft caps) you
//! often want a running token count without holding the full text in
//! memory and without re-tokenizing the entire prefix on every chunk.
//!
//! This crate is a tiny counter:
//!
//! 1. Construct with a tokenizer function (`fn(&str) -> u64`).
//! 2. Call [`TokenStream::push`] with each delta. The stream forwards
//!    the chunk through your tokenizer and bumps the running total.
//! 3. Read [`TokenStream::count`] at any time.
//!
//! The default estimator (4 chars per token, ceiling) is what
//! `char-token-est`'s `Family::Gpt` uses; swap in tiktoken via the
//! constructor when accuracy matters.
//!
//! ## Example
//!
//! ```
//! use tiktoken_stream::TokenStream;
//!
//! let mut s = TokenStream::new();
//! s.push("Hello, ");
//! s.push("world!");
//! assert!(s.count() >= 1);
//! ```
//!
//! ## Custom estimator
//!
//! ```
//! use tiktoken_stream::TokenStream;
//!
//! // One token per whitespace-separated word.
//! let mut s = TokenStream::with_estimator(|chunk: &str| {
//!     chunk.split_whitespace().count() as u64
//! });
//! s.push("the quick brown");
//! s.push(" fox jumps");
//! assert_eq!(s.count(), 5);
//! ```

#![deny(missing_docs)]

type Estimator = Box<dyn FnMut(&str) -> u64 + Send>;

/// Streaming token counter.
pub struct TokenStream {
    estimator: Estimator,
    count: u64,
    chars: u64,
}

impl TokenStream {
    /// Construct a stream using the default 4-chars-per-token estimator.
    pub fn new() -> Self {
        Self::with_estimator(default_estimator)
    }

    /// Construct a stream that runs `est` on each pushed chunk.
    pub fn with_estimator<F>(est: F) -> Self
    where
        F: FnMut(&str) -> u64 + Send + 'static,
    {
        Self {
            estimator: Box::new(est),
            count: 0,
            chars: 0,
        }
    }

    /// Push the next chunk. Returns the new running count.
    pub fn push(&mut self, chunk: &str) -> u64 {
        self.chars += chunk.chars().count() as u64;
        self.count += (self.estimator)(chunk);
        self.count
    }

    /// Current running token count.
    pub fn count(&self) -> u64 {
        self.count
    }

    /// Total characters pushed so far.
    pub fn chars(&self) -> u64 {
        self.chars
    }

    /// Reset the counter (estimator preserved).
    pub fn reset(&mut self) {
        self.count = 0;
        self.chars = 0;
    }
}

impl Default for TokenStream {
    fn default() -> Self {
        Self::new()
    }
}

impl std::fmt::Debug for TokenStream {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("TokenStream")
            .field("count", &self.count)
            .field("chars", &self.chars)
            .finish()
    }
}

/// 4-chars-per-token, ceil.
fn default_estimator(chunk: &str) -> u64 {
    let chars = chunk.chars().count() as f64;
    if chars == 0.0 {
        0
    } else {
        (chars / 4.0).ceil() as u64
    }
}