Skip to main content

neuron_context/
counter.rs

1//! Token count estimation from messages and tool definitions.
2
3use neuron_types::{ContentBlock, ContentItem, Message, ToolDefinition};
4
5/// Estimates token counts from text using a configurable chars-per-token ratio.
6///
7/// This is a heuristic estimator — real tokenization varies per model. The
8/// default ratio of 4.0 chars/token approximates GPT-family and Claude models.
9///
10/// # Example
11///
12/// ```
13/// use neuron_context::TokenCounter;
14///
15/// let counter = TokenCounter::new();
16/// let estimate = counter.estimate_text("Hello, world!");
17/// assert!(estimate > 0);
18/// ```
19pub struct TokenCounter {
20    chars_per_token: f32,
21}
22
23impl Default for TokenCounter {
24    fn default() -> Self {
25        Self::new()
26    }
27}
28
29impl TokenCounter {
30    /// Creates a new `TokenCounter` with the default ratio of 4.0 chars/token.
31    #[must_use]
32    pub fn new() -> Self {
33        Self { chars_per_token: 4.0 }
34    }
35
36    /// Creates a new `TokenCounter` with a custom chars-per-token ratio.
37    #[must_use]
38    pub fn with_ratio(chars_per_token: f32) -> Self {
39        Self { chars_per_token }
40    }
41
42    /// Estimates the number of tokens in a text string.
43    #[must_use]
44    pub fn estimate_text(&self, text: &str) -> usize {
45        (text.len() as f32 / self.chars_per_token).ceil() as usize
46    }
47
48    /// Estimates the total token count for a slice of messages.
49    ///
50    /// Iterates all content blocks and sums their estimated token counts.
51    #[must_use]
52    pub fn estimate_messages(&self, messages: &[Message]) -> usize {
53        messages.iter().map(|m| self.estimate_message(m)).sum()
54    }
55
56    /// Estimates the total token count for a slice of tool definitions.
57    #[must_use]
58    pub fn estimate_tools(&self, tools: &[ToolDefinition]) -> usize {
59        tools
60            .iter()
61            .map(|t| {
62                let name_tokens = self.estimate_text(&t.name);
63                let desc_tokens = self.estimate_text(&t.description);
64                let schema_str = t.input_schema.to_string();
65                let schema_tokens = self.estimate_text(&schema_str);
66                name_tokens + desc_tokens + schema_tokens
67            })
68            .sum()
69    }
70
71    fn estimate_message(&self, message: &Message) -> usize {
72        // Add a small overhead per message for role markers / formatting
73        let role_overhead = 4;
74        let content_tokens: usize = message
75            .content
76            .iter()
77            .map(|block| self.estimate_content_block(block))
78            .sum();
79        role_overhead + content_tokens
80    }
81
82    fn estimate_content_block(&self, block: &ContentBlock) -> usize {
83        match block {
84            ContentBlock::Text(text) => self.estimate_text(text),
85            ContentBlock::Thinking { thinking, .. } => self.estimate_text(thinking),
86            ContentBlock::RedactedThinking { data } => self.estimate_text(data),
87            ContentBlock::ToolUse { name, input, .. } => {
88                let name_tokens = self.estimate_text(name);
89                let input_str = input.to_string();
90                let input_tokens = self.estimate_text(&input_str);
91                name_tokens + input_tokens
92            }
93            ContentBlock::ToolResult { content, .. } => {
94                content.iter().map(|item| self.estimate_content_item(item)).sum()
95            }
96            ContentBlock::Image { .. } => {
97                // Images are expensive; use a fixed estimate
98                300
99            }
100            ContentBlock::Document { .. } => {
101                // Documents are expensive; use a fixed estimate
102                500
103            }
104            ContentBlock::Compaction { content } => self.estimate_text(content),
105        }
106    }
107
108    fn estimate_content_item(&self, item: &ContentItem) -> usize {
109        match item {
110            ContentItem::Text(text) => self.estimate_text(text),
111            ContentItem::Image { .. } => 300,
112        }
113    }
114}