ai_memory/
sizes.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! v0.6.4-005 — Static schema-size table.
5//!
6//! Computes the per-tool BPE token cost of every MCP tool registered by
7//! `crate::mcp::tool_definitions`, using the `tiktoken-rs` `cl100k_base`
8//! tokenizer (the same BPE Claude/GPT use for context-window accounting,
9//! and the same one v0.6.3.1 P6/R1 already wires for `budget_tokens`).
10//!
11//! The table is computed lazily on first access and cached behind a
12//! `OnceLock`. The cost of the first call is one full pass over every
13//! tool schema (~7 ms on Apple M2) followed by cache hits forever after.
14//!
15//! ## Why lazy and not literally compile-time
16//!
17//! The "build-time" framing in the v0.6.4 issue spec referred to the
18//! desire that operators be able to query the table without running
19//! the full MCP `register_tools()` dance — the runtime cache satisfies
20//! that constraint. A real build-time approach would need either a
21//! proc-macro or a `build.rs` that re-parsed the JSON-emitting Rust
22//! source, both of which trade simplicity for marginal warm-cache
23//! performance that nobody is paying for here. The lazy approach also
24//! keeps the BPE table out of `cargo bench` cold paths — every place
25//! that *doesn't* run `doctor --tokens` pays exactly nothing.
26//!
27//! ## CI gate
28//!
29//! `tool_sizes_under_ci_gate()` returns the largest single tool cost.
30//! The unit test `no_tool_exceeds_1500_tokens` enforces the v0.6.4-005
31//! acceptance gate that no individual tool definition exceeds 1500
32//! tokens. The number is high enough to permit growth on the more
33//! schema-heavy KG/governance tools and low enough that doubling a
34//! tool's schema by accident lands in CI red.
35
36use std::sync::OnceLock;
37
38use serde_json::Value;
39use tiktoken_rs::CoreBPE;
40
41/// Single-tool cost report. The `total` is what counts against the
42/// per-request prefix; the `name_tokens` and `schema_tokens` split is
43/// useful for the doctor's diagnostic output.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub struct ToolSize {
46    pub name: String,
47    pub schema_tokens: usize,
48    pub name_tokens: usize,
49    pub total_tokens: usize,
50}
51
52/// Runtime-computed table of every tool's tokenized schema cost.
53///
54/// Returns a static slice on every call after the first invocation
55/// (which performs the one-time BPE pass).
56pub fn tool_sizes() -> &'static [ToolSize] {
57    static TABLE: OnceLock<Vec<ToolSize>> = OnceLock::new();
58    TABLE.get_or_init(compute_table).as_slice()
59}
60
61/// Highest-cost tool in the table. Used by the CI gate.
62pub fn tool_sizes_under_ci_gate() -> usize {
63    tool_sizes()
64        .iter()
65        .map(|t| t.total_tokens)
66        .max()
67        .unwrap_or(0)
68}
69
70/// Sum of every tool's `total_tokens` — the worst-case prefix cost on
71/// an eager-loading harness with `--profile full`.
72pub fn full_profile_total_tokens() -> usize {
73    tool_sizes().iter().map(|t| t.total_tokens).sum()
74}
75
76/// Lookup a single tool by name. `O(n)` but `n ≤ 43`.
77pub fn tool_size(name: &str) -> Option<&'static ToolSize> {
78    tool_sizes().iter().find(|t| t.name == name)
79}
80
81fn compute_table() -> Vec<ToolSize> {
82    let bpe = bpe();
83    let defs = crate::mcp::tool_definitions();
84    let tools = defs
85        .get("tools")
86        .and_then(Value::as_array)
87        .cloned()
88        .unwrap_or_default();
89
90    tools
91        .into_iter()
92        .filter_map(|tool| size_one_tool(&bpe, &tool))
93        .collect()
94}
95
96fn size_one_tool(bpe: &CoreBPE, tool: &Value) -> Option<ToolSize> {
97    let name = tool.get("name").and_then(Value::as_str)?.to_string();
98    // The cost the host pays is the serialized JSON of the entire tool
99    // object — name + description + inputSchema. We use the canonical
100    // serde_json serialization (no pretty-printing) because that is
101    // what every MCP host transmits over stdio.
102    let schema_json = serde_json::to_string(tool).ok()?;
103    let schema_tokens = bpe.encode_with_special_tokens(&schema_json).len();
104    let name_tokens = bpe.encode_with_special_tokens(&name).len();
105    Some(ToolSize {
106        name,
107        schema_tokens,
108        name_tokens,
109        total_tokens: schema_tokens,
110    })
111}
112
113fn bpe() -> CoreBPE {
114    // We construct a fresh BPE on each compute_table call (only ever
115    // called once) because `cl100k_base` returns an owned `CoreBPE`
116    // and stashing it forever in a static would leak ~1.7 MB for a
117    // table that only gets walked at startup. Cheap to throw away.
118    tiktoken_rs::cl100k_base().expect("cl100k_base BPE table embedded in tiktoken-rs")
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124
125    /// CI gate per v0.6.4-005 acceptance criteria. If any tool's schema
126    /// crosses 1500 tokens, the whole build fails. The number is roughly
127    /// 2.5× today's largest tool (memory_store at ~620 tokens) so we have
128    /// runway, but not so much runway that a 3× regression slips through.
129    #[test]
130    fn no_tool_exceeds_1500_tokens() {
131        let max = tool_sizes_under_ci_gate();
132        assert!(
133            max <= 1500,
134            "v0.6.4-005 CI gate: largest tool schema is {max} tokens (limit: 1500). \
135             Inspect `cargo run -- doctor --tokens --raw-table` to find the offender."
136        );
137    }
138
139    /// Sanity: the table must be populated. Catches accidental empty
140    /// `tool_definitions()` regressions that would silently hide other
141    /// failures.
142    #[test]
143    fn table_has_43_entries_matching_tool_definitions_count() {
144        let n = tool_sizes().len();
145        assert_eq!(
146            n, 43,
147            "expected exactly 43 tools (v0.6.3.1 baseline source-anchored at \
148             src/mcp.rs::tool_definitions); got {n}. If the count changed, \
149             update the v0.6.4 family map and this assertion together."
150        );
151    }
152
153    /// Every tool should have non-zero name + schema costs. Zero would
154    /// mean either an empty schema or a tokenizer wiring break.
155    #[test]
156    fn every_tool_has_nonzero_cost() {
157        for t in tool_sizes() {
158            assert!(t.schema_tokens > 0, "tool {} schema_tokens = 0", t.name);
159            assert!(t.name_tokens > 0, "tool {} name_tokens = 0", t.name);
160        }
161    }
162
163    /// Full-profile total cost — measured against `cl100k_base` (the
164    /// tokenizer Claude / GPT actually use for input accounting).
165    ///
166    /// **Truthfulness note (v0.6.4-005, 2026-05-04):** the v0.6.4 RFC
167    /// claimed ~25,800 tokens for the full surface, derived from "~600
168    /// tokens/tool × 43" measured against MiniLM. MiniLM is a sentence-
169    /// embedding vocabulary (~30K tokens) that systematically over-counts
170    /// JSON by ~4× vs. `cl100k_base` (100K-token chat-completion BPE).
171    /// The actual measured cost in `cl100k_base` is ~6,000 tokens for
172    /// the full surface — still material, still worth the v0.6.4 ship,
173    /// but the public claims need a 4× downward correction (tracked in
174    /// v0.6.4-014 + v0.6.4-015 docs work).
175    ///
176    /// This test pins the new honest range. The savings *percentage*
177    /// from `core` (~700 tokens) is unchanged at ~88%; the savings
178    /// *absolute* is ~5,300 tokens per request, not ~22,000.
179    #[test]
180    fn full_profile_total_in_honest_measured_range() {
181        let total = full_profile_total_tokens();
182        assert!(
183            (5_000..=8_000).contains(&total),
184            "full-profile total {total} tokens is outside the measured \
185             cl100k_base range (5K–8K). If the schema grew, update the \
186             public claim in RFC/README/roadmap and adjust this bound."
187        );
188    }
189
190    /// Lookup by name should resolve a known tool.
191    #[test]
192    fn tool_size_resolves_memory_store() {
193        let t = tool_size("memory_store").expect("memory_store should exist");
194        assert!(t.total_tokens > 0);
195        assert!(t.total_tokens < 1500);
196    }
197
198    /// Lookup of a nonexistent tool should return None, not panic.
199    #[test]
200    fn tool_size_returns_none_for_unknown() {
201        assert!(tool_size("memory_does_not_exist_42").is_none());
202    }
203}
ai_memory/sizes.rs

ai_memory/
sizes.rs