use std::sync::OnceLock;
use serde_json::Value;
use tiktoken_rs::CoreBPE;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ToolSize {
pub name: String,
pub schema_tokens: usize,
pub name_tokens: usize,
pub total_tokens: usize,
}
pub fn tool_sizes() -> &'static [ToolSize] {
static TABLE: OnceLock<Vec<ToolSize>> = OnceLock::new();
TABLE.get_or_init(compute_table).as_slice()
}
pub fn tool_sizes_under_ci_gate() -> usize {
tool_sizes()
.iter()
.map(|t| t.total_tokens)
.max()
.unwrap_or(0)
}
pub fn full_profile_total_tokens() -> usize {
tool_sizes().iter().map(|t| t.total_tokens).sum()
}
pub fn tool_size(name: &str) -> Option<&'static ToolSize> {
tool_sizes().iter().find(|t| t.name == name)
}
fn compute_table() -> Vec<ToolSize> {
let bpe = bpe();
let defs = crate::mcp::tool_definitions();
let tools = defs
.get("tools")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
tools
.into_iter()
.filter_map(|tool| size_one_tool(&bpe, &tool))
.collect()
}
fn size_one_tool(bpe: &CoreBPE, tool: &Value) -> Option<ToolSize> {
let name = tool.get("name").and_then(Value::as_str)?.to_string();
let schema_json = serde_json::to_string(tool).ok()?;
let schema_tokens = bpe.encode_with_special_tokens(&schema_json).len();
let name_tokens = bpe.encode_with_special_tokens(&name).len();
Some(ToolSize {
name,
schema_tokens,
name_tokens,
total_tokens: schema_tokens,
})
}
fn bpe() -> CoreBPE {
tiktoken_rs::cl100k_base().expect("cl100k_base BPE table embedded in tiktoken-rs")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn no_tool_exceeds_1500_tokens() {
let max = tool_sizes_under_ci_gate();
assert!(
max <= 1500,
"v0.6.4-005 CI gate: largest tool schema is {max} tokens (limit: 1500). \
Inspect `cargo run -- doctor --tokens --raw-table` to find the offender."
);
}
#[test]
fn table_has_43_entries_matching_tool_definitions_count() {
let n = tool_sizes().len();
assert_eq!(
n, 43,
"expected exactly 43 tools (v0.6.3.1 baseline source-anchored at \
src/mcp.rs::tool_definitions); got {n}. If the count changed, \
update the v0.6.4 family map and this assertion together."
);
}
#[test]
fn every_tool_has_nonzero_cost() {
for t in tool_sizes() {
assert!(t.schema_tokens > 0, "tool {} schema_tokens = 0", t.name);
assert!(t.name_tokens > 0, "tool {} name_tokens = 0", t.name);
}
}
#[test]
fn full_profile_total_in_honest_measured_range() {
let total = full_profile_total_tokens();
assert!(
(5_000..=8_000).contains(&total),
"full-profile total {total} tokens is outside the measured \
cl100k_base range (5K–8K). If the schema grew, update the \
public claim in RFC/README/roadmap and adjust this bound."
);
}
#[test]
fn tool_size_resolves_memory_store() {
let t = tool_size("memory_store").expect("memory_store should exist");
assert!(t.total_tokens > 0);
assert!(t.total_tokens < 1500);
}
#[test]
fn tool_size_returns_none_for_unknown() {
assert!(tool_size("memory_does_not_exist_42").is_none());
}
}