phi_core/context/token.rs
1use crate::types::*;
2use std::sync::Arc;
3
4// ---------------------------------------------------------------------------
5// TokenCounter trait (REQ-162)
6// ---------------------------------------------------------------------------
7
8/// Pluggable token counting strategy.
9///
10/// The default implementation ([`HeuristicTokenCounter`]) uses a ~4 chars/token
11/// heuristic — fast and sufficient for context budgeting. Provide a custom
12/// implementation for model-specific tokenizers (e.g., tiktoken for OpenAI models,
13/// or Anthropic's native token-counting API).
14///
15/// Only `estimate_text` needs to be overridden. The higher-level methods
16/// (`estimate_content`, `estimate_message`, `estimate_messages`) have default
17/// implementations that delegate to `estimate_text`.
18///
19/// # Example
20///
21/// ```
22/// use phi_core::context::token::{TokenCounter, HeuristicTokenCounter};
23///
24/// let counter = HeuristicTokenCounter;
25/// assert_eq!(counter.estimate_text("hello"), 2); // 5 chars / 4 = 2 (rounded up)
26/// ```
27pub trait TokenCounter: Send + Sync {
28 /// Estimate tokens for a raw text string.
29 fn estimate_text(&self, text: &str) -> usize;
30
31 /// Estimate tokens for a slice of Content blocks.
32 fn estimate_content(&self, content: &[Content]) -> usize {
33 content
34 .iter()
35 .map(|c| match c {
36 Content::Text { text } => self.estimate_text(text),
37 Content::Image { data, .. } => {
38 let raw_bytes = data.len() * 3 / 4;
39 (raw_bytes / 750).clamp(85, 16_000)
40 }
41 Content::Thinking { thinking, .. } => self.estimate_text(thinking),
42 Content::ToolCall {
43 name, arguments, ..
44 } => self.estimate_text(name) + self.estimate_text(&arguments.to_string()) + 8,
45 })
46 .sum()
47 }
48
49 /// Estimate tokens for a single message.
50 fn estimate_message(&self, msg: &AgentMessage) -> usize {
51 match msg {
52 AgentMessage::Llm(lm) => match &lm.message {
53 Message::User { content, .. } => self.estimate_content(content) + 4,
54 Message::Assistant { content, .. } => self.estimate_content(content) + 4,
55 Message::ToolResult {
56 content, tool_name, ..
57 } => self.estimate_content(content) + self.estimate_text(tool_name) + 8,
58 },
59 AgentMessage::Extension(ext) => self.estimate_text(&ext.data.to_string()) + 4,
60 }
61 }
62
63 /// Estimate total tokens for a message list.
64 fn estimate_messages(&self, msgs: &[AgentMessage]) -> usize {
65 msgs.iter().map(|m| self.estimate_message(m)).sum()
66 }
67}
68
69// ---------------------------------------------------------------------------
70// HeuristicTokenCounter (default)
71// ---------------------------------------------------------------------------
72
73/// Default token counter: ~4 chars per token (heuristic for English text).
74///
75/// Good enough for context budgeting and compaction threshold decisions.
76/// Use a model-specific tokenizer (e.g., tiktoken) for precision.
77pub struct HeuristicTokenCounter;
78
79impl TokenCounter for HeuristicTokenCounter {
80 fn estimate_text(&self, text: &str) -> usize {
81 text.len().div_ceil(4)
82 }
83}
84
85// ---------------------------------------------------------------------------
86// Free functions (backward-compatible wrappers)
87// ---------------------------------------------------------------------------
88
89/// Rough token estimate: ~4 chars per token for English text.
90/// See [`TokenCounter`] for pluggable alternatives.
91pub fn estimate_tokens(text: &str) -> usize {
92 HeuristicTokenCounter.estimate_text(text)
93}
94
95/// Estimate tokens for a single message.
96pub fn message_tokens(msg: &AgentMessage) -> usize {
97 HeuristicTokenCounter.estimate_message(msg)
98}
99
100/// Estimate tokens for a Content slice.
101pub fn content_tokens(content: &[Content]) -> usize {
102 HeuristicTokenCounter.estimate_content(content)
103}
104
105/// Estimate total tokens for a message list.
106pub fn total_tokens(messages: &[AgentMessage]) -> usize {
107 HeuristicTokenCounter.estimate_messages(messages)
108}
109
110// ---------------------------------------------------------------------------
111// Helper: resolve counter from optional
112// ---------------------------------------------------------------------------
113
114/// Returns the provided counter, or `HeuristicTokenCounter` if `None`.
115pub fn resolve_counter(counter: Option<&Arc<dyn TokenCounter>>) -> &dyn TokenCounter {
116 counter
117 .map(|c| c.as_ref() as &dyn TokenCounter)
118 .unwrap_or(&HeuristicTokenCounter)
119}