Skip to main content

neuron_context/
counter.rs

1//! Token count estimation from messages and tool definitions.
2
3use neuron_types::{ContentBlock, ContentItem, Message, ToolDefinition};
4
5/// Estimates token counts from text using a configurable chars-per-token ratio.
6///
7/// This is a heuristic estimator — real tokenization varies per model. The
8/// default ratio of 4.0 chars/token approximates GPT-family and Claude models.
9///
10/// # Example
11///
12/// ```
13/// use neuron_context::TokenCounter;
14///
15/// let counter = TokenCounter::new();
16/// let estimate = counter.estimate_text("Hello, world!");
17/// assert!(estimate > 0);
18/// ```
19pub struct TokenCounter {
20    chars_per_token: f32,
21}
22
23impl Default for TokenCounter {
24    fn default() -> Self {
25        Self::new()
26    }
27}
28
29impl TokenCounter {
30    /// Creates a new `TokenCounter` with the default ratio of 4.0 chars/token.
31    #[must_use]
32    pub fn new() -> Self {
33        Self {
34            chars_per_token: 4.0,
35        }
36    }
37
38    /// Creates a new `TokenCounter` with a custom chars-per-token ratio.
39    #[must_use]
40    pub fn with_ratio(chars_per_token: f32) -> Self {
41        Self { chars_per_token }
42    }
43
44    /// Estimates the number of tokens in a text string.
45    #[must_use]
46    pub fn estimate_text(&self, text: &str) -> usize {
47        (text.len() as f32 / self.chars_per_token).ceil() as usize
48    }
49
50    /// Estimates the total token count for a slice of messages.
51    ///
52    /// Iterates all content blocks and sums their estimated token counts.
53    #[must_use]
54    pub fn estimate_messages(&self, messages: &[Message]) -> usize {
55        messages.iter().map(|m| self.estimate_message(m)).sum()
56    }
57
58    /// Estimates the total token count for a slice of tool definitions.
59    #[must_use]
60    pub fn estimate_tools(&self, tools: &[ToolDefinition]) -> usize {
61        tools
62            .iter()
63            .map(|t| {
64                let name_tokens = self.estimate_text(&t.name);
65                let desc_tokens = self.estimate_text(&t.description);
66                let schema_str = t.input_schema.to_string();
67                let schema_tokens = self.estimate_text(&schema_str);
68                name_tokens + desc_tokens + schema_tokens
69            })
70            .sum()
71    }
72
73    fn estimate_message(&self, message: &Message) -> usize {
74        // Add a small overhead per message for role markers / formatting
75        let role_overhead = 4;
76        let content_tokens: usize = message
77            .content
78            .iter()
79            .map(|block| self.estimate_content_block(block))
80            .sum();
81        role_overhead + content_tokens
82    }
83
84    fn estimate_content_block(&self, block: &ContentBlock) -> usize {
85        match block {
86            ContentBlock::Text(text) => self.estimate_text(text),
87            ContentBlock::Thinking { thinking, .. } => self.estimate_text(thinking),
88            ContentBlock::RedactedThinking { data } => self.estimate_text(data),
89            ContentBlock::ToolUse { name, input, .. } => {
90                let name_tokens = self.estimate_text(name);
91                let input_str = input.to_string();
92                let input_tokens = self.estimate_text(&input_str);
93                name_tokens + input_tokens
94            }
95            ContentBlock::ToolResult { content, .. } => content
96                .iter()
97                .map(|item| self.estimate_content_item(item))
98                .sum(),
99            ContentBlock::Image { .. } => {
100                // Images are expensive; use a fixed estimate
101                300
102            }
103            ContentBlock::Document { .. } => {
104                // Documents are expensive; use a fixed estimate
105                500
106            }
107            ContentBlock::Compaction { content } => self.estimate_text(content),
108        }
109    }
110
111    fn estimate_content_item(&self, item: &ContentItem) -> usize {
112        match item {
113            ContentItem::Text(text) => self.estimate_text(text),
114            ContentItem::Image { .. } => 300,
115        }
116    }
117}