ai_memory/
sizes.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! v0.6.4-005 — Static schema-size table.
5//!
6//! Computes the per-tool BPE token cost of every MCP tool registered by
7//! `crate::mcp::tool_definitions`, using the `tiktoken-rs` `cl100k_base`
8//! tokenizer (the same BPE Claude/GPT use for context-window accounting,
9//! and the same one v0.6.3.1 P6/R1 already wires for `budget_tokens`).
10//!
11//! The table is computed lazily on first access and cached behind a
12//! `OnceLock`. The cost of the first call is one full pass over every
13//! tool schema (~7 ms on Apple M2) followed by cache hits forever after.
14//!
15//! ## Why lazy and not literally compile-time
16//!
17//! The "build-time" framing in the v0.6.4 issue spec referred to the
18//! desire that operators be able to query the table without running
19//! the full MCP `register_tools()` dance — the runtime cache satisfies
20//! that constraint. A real build-time approach would need either a
21//! proc-macro or a `build.rs` that re-parsed the JSON-emitting Rust
22//! source, both of which trade simplicity for marginal warm-cache
23//! performance that nobody is paying for here. The lazy approach also
24//! keeps the BPE table out of `cargo bench` cold paths — every place
25//! that *doesn't* run `doctor --tokens` pays exactly nothing.
26//!
27//! ## CI gate
28//!
29//! `tool_sizes_under_ci_gate()` returns the largest single tool cost.
30//! The unit test `no_tool_exceeds_1500_tokens` enforces the v0.6.4-005
31//! acceptance gate that no individual tool definition exceeds 1500
32//! tokens. The number is high enough to permit growth on the more
33//! schema-heavy KG/governance tools and low enough that doubling a
34//! tool's schema by accident lands in CI red.
35
36use std::sync::OnceLock;
37
38use serde_json::Value;
39use tiktoken_rs::CoreBPE;
40
41/// Single-tool cost report. The `total` is what counts against the
42/// per-request prefix; the `name_tokens` and `schema_tokens` split is
43/// useful for the doctor's diagnostic output.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub struct ToolSize {
46    pub name: String,
47    pub schema_tokens: usize,
48    pub name_tokens: usize,
49    pub total_tokens: usize,
50}
51
52/// pm-v3.1 PR8 (issue #1174) — shared one-shot cache for the verbose
53/// table. Replaces the prior pair of function-local `static TABLE`
54/// declarations (one in `tool_sizes`, one in `trimmed_tool_sizes`)
55/// with a single module-level slot per shape. The cache is computed
56/// lazily on first use and reused forever after.
57static VERBOSE_TABLE: OnceLock<Vec<ToolSize>> = OnceLock::new();
58
59/// pm-v3.1 PR8 (issue #1174) — shared cache for the trimmed (wire-
60/// form) table, matching `VERBOSE_TABLE` above.
61static TRIMMED_TABLE: OnceLock<Vec<ToolSize>> = OnceLock::new();
62
63/// Runtime-computed table of every tool's tokenized schema cost
64/// **at the verbose ceiling** — every optional param, every default,
65/// every per-property description. This is the upper bound a host
66/// can ever pay (only reachable via
67/// `memory_capabilities { verbose=true, family=…, include_schema=true }`
68/// since v0.7 C4).
69///
70/// Returns a static slice on every call after the first invocation
71/// (which performs the one-time BPE pass).
72pub fn tool_sizes() -> &'static [ToolSize] {
73    VERBOSE_TABLE
74        .get_or_init(|| compute_table(false))
75        .as_slice()
76}
77
78/// v0.7 C4 + #859 — runtime-computed table of every tool's tokenized
79/// schema cost **as actually shipped on `tools/list`**. Per-property
80/// `description` prose is stripped, the top-level tool `description`
81/// is compacted to the first sentence, the `docs` field is dropped,
82/// but every property entry survives so MCP clients can discover the
83/// call surface (per [`crate::mcp::trim_optional_params`] +
84/// [`crate::mcp::strip_docs_from_tools`] + `wire_compact_descriptions`).
85/// This is what an MCP host pays per request on the default code path.
86///
87/// **Wire-form invariant.** This table is computed by feeding the
88/// output of [`crate::mcp::tool_definitions_for_profile`] (full
89/// profile) into the cl100k_base tokenizer; the budget gate at
90/// `tests/c2_tool_docs_field.rs::c2_tools_list_token_budget_is_under_post_859_ceiling`
91/// pins the sum at ≤ 5000 cl100k tokens (post-#859 floor; was 3500
92/// pre-#859 when the trim hid optional property keys entirely).
93pub fn trimmed_tool_sizes() -> &'static [ToolSize] {
94    TRIMMED_TABLE.get_or_init(|| compute_table(true)).as_slice()
95}
96
97/// Highest-cost tool in the verbose table. Used by the CI gate.
98pub fn tool_sizes_under_ci_gate() -> usize {
99    tool_sizes()
100        .iter()
101        .map(|t| t.total_tokens)
102        .max()
103        .unwrap_or(0)
104}
105
106/// Sum of every tool's `total_tokens` (verbose schema) — the
107/// worst-case prefix cost on a `verbose=true` opt-in harness with
108/// `--profile full`. The actually-paid cost on the default code path
109/// is reported by [`trimmed_full_profile_total_tokens`].
110pub fn full_profile_total_tokens() -> usize {
111    tool_sizes().iter().map(|t| t.total_tokens).sum()
112}
113
114/// v0.7 C4 — sum of every tool's `total_tokens` after the C4 trim
115/// (optionals hidden). This is the bare `tools/list` payload cost
116/// under `--profile full`.
117pub fn trimmed_full_profile_total_tokens() -> usize {
118    trimmed_tool_sizes().iter().map(|t| t.total_tokens).sum()
119}
120
121/// Lookup a single tool by name in the verbose table. `O(n)` but
122/// `n ≤ 57` (v0.7.0 L1-5 added 5 skill tools).
123pub fn tool_size(name: &str) -> Option<&'static ToolSize> {
124    tool_sizes().iter().find(|t| t.name == name)
125}
126
127fn compute_table(trimmed: bool) -> Vec<ToolSize> {
128    let bpe = bpe();
129    // #859 — to keep the budget model in lockstep with the actually-
130    // shipped wire payload, delegate to `tool_definitions_for_profile`
131    // for the trimmed case (which now performs the full wire shape:
132    // properties preserved, per-property prose stripped, top-level
133    // description compacted). For the verbose case we measure the raw
134    // `tool_definitions()` table as it would appear on the
135    // `memory_capabilities { verbose=true }` opt-in path.
136    let defs = if trimmed {
137        crate::mcp::tool_definitions_for_profile(&crate::profile::Profile::full())
138    } else {
139        crate::mcp::tool_definitions()
140    };
141    let tools = defs
142        .get("tools")
143        .and_then(Value::as_array)
144        .cloned()
145        .unwrap_or_default();
146
147    tools
148        .into_iter()
149        .filter_map(|tool| size_one_tool(&bpe, &tool))
150        .collect()
151}
152
153fn size_one_tool(bpe: &CoreBPE, tool: &Value) -> Option<ToolSize> {
154    let name = tool.get("name").and_then(Value::as_str)?.to_string();
155    // The cost the host pays is the serialized JSON of the entire tool
156    // object — name + description + inputSchema. We use the canonical
157    // serde_json serialization (no pretty-printing) because that is
158    // what every MCP host transmits over stdio.
159    let schema_json = serde_json::to_string(tool).ok()?;
160    let schema_tokens = bpe.encode_with_special_tokens(&schema_json).len();
161    let name_tokens = bpe.encode_with_special_tokens(&name).len();
162    Some(ToolSize {
163        name,
164        schema_tokens,
165        name_tokens,
166        total_tokens: schema_tokens,
167    })
168}
169
170fn bpe() -> CoreBPE {
171    // We construct a fresh BPE on each compute_table call (only ever
172    // called once) because `cl100k_base` returns an owned `CoreBPE`
173    // and stashing it forever in a static would leak ~1.7 MB for a
174    // table that only gets walked at startup. Cheap to throw away.
175    tiktoken_rs::cl100k_base().expect("cl100k_base BPE table embedded in tiktoken-rs")
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    /// CI gate per v0.6.4-005 acceptance criteria. If any tool's schema
183    /// crosses 1500 tokens, the whole build fails. The number is roughly
184    /// 2.5× today's largest tool (memory_store at ~620 tokens) so we have
185    /// runway, but not so much runway that a 3× regression slips through.
186    #[test]
187    fn no_tool_exceeds_1500_tokens() {
188        let max = tool_sizes_under_ci_gate();
189        assert!(
190            max <= 1500,
191            "v0.6.4-005 CI gate: largest tool schema is {max} tokens (limit: 1500). \
192             Inspect `cargo run -- doctor --tokens --raw-table` to find the offender."
193        );
194    }
195
196    /// Sanity: the table must be populated. Catches accidental empty
197    /// `tool_definitions()` regressions that would silently hide other
198    /// failures.
199    #[test]
200    fn table_entry_count_matches_full_profile() {
201        // v0.7.0 refactor PR-2 (#793) — tool-count SSOT. Anchor the
202        // assertion on `Profile::full().expected_tool_count()` (derived
203        // from the per-Family `tool_names` slices) rather than a
204        // hardcoded literal, so adding a new MCP tool touches ONE site
205        // (the family slice) instead of N assertions across the
206        // codebase.
207        let n = tool_sizes().len();
208        let expected = crate::profile::Profile::full().expected_tool_count();
209        assert_eq!(
210            n, expected,
211            "tool_sizes() must hold exactly {expected} tools (the full-profile \
212             SSOT count); got {n}. If these diverge, a tool was added to one \
213             surface but not the other."
214        );
215    }
216
217    /// Every tool should have non-zero name + schema costs. Zero would
218    /// mean either an empty schema or a tokenizer wiring break.
219    #[test]
220    fn every_tool_has_nonzero_cost() {
221        for t in tool_sizes() {
222            assert!(t.schema_tokens > 0, "tool {} schema_tokens = 0", t.name);
223            assert!(t.name_tokens > 0, "tool {} name_tokens = 0", t.name);
224        }
225    }
226
227    /// Full-profile total cost — measured against `cl100k_base` (the
228    /// tokenizer Claude / GPT actually use for input accounting).
229    ///
230    /// **Truthfulness note (v0.6.4-005, 2026-05-04):** the v0.6.4 RFC
231    /// claimed ~25,800 tokens for the full surface, derived from "~600
232    /// tokens/tool × 43" measured against MiniLM. MiniLM is a sentence-
233    /// embedding vocabulary (~30K tokens) that systematically over-counts
234    /// JSON by ~4× vs. `cl100k_base` (100K-token chat-completion BPE).
235    /// The actual measured cost in `cl100k_base` is ~6,000 tokens for
236    /// the full surface — still material, still worth the v0.6.4 ship,
237    /// but the public claims need a 4× downward correction (tracked in
238    /// v0.6.4-014 + v0.6.4-015 docs work).
239    ///
240    /// **v0.7 C2 update (2026-05-06):** the canonical
241    /// `tool_definitions()` now carries an additional per-tool `docs`
242    /// field (long-form description + examples) that the bare
243    /// `tools/list` payload strips before transmission. The numbers
244    /// in this table reflect the **source of truth** (verbose +
245    /// short), not the wire payload. The bare-wire C5 budget is
246    /// pinned separately at ≤ 3500 tokens by
247    /// `tests/c2_tool_docs_field.rs::c2_tools_list_token_budget_is_under_post_859_ceiling`.
248    /// The savings *percentage* from `core` is unchanged; the
249    /// always-on payload is now ~85% smaller than the source.
250    #[test]
251    fn full_profile_total_in_honest_measured_range() {
252        let total = full_profile_total_tokens();
253        // **v0.7.0 #829 update.** Prior bound was 5K..=16K to soak the
254        // multi-paragraph `docs` prose that every tool carried. After
255        // the #829 trim every `docs` field is a single condensed
256        // sentence with issue refs + tier annotations preserved.
257        //
258        // **v0.7.0 #987 update.** D1.6 collapsed `tool_definitions()`
259        // to iterate over per-tool `McpTool` impls; the schemars-derived
260        // `inputSchema` carries additional metadata the legacy
261        // hand-coded macro didn't: `additionalProperties: false`,
262        // `default: null` on optional fields, `$schema` reference,
263        // `title`, request-struct-level `description`. Measured total
264        // settles at ~15K. Hard ceiling at 17K to leave 2K headroom for
265        // the next field-addition without re-bumping. Floor stays at 5K
266        // to catch a wiring break that drops the catalog entirely.
267        assert!(
268            (5_000..=17_000).contains(&total),
269            "full-profile total {total} tokens is outside the measured \
270             cl100k_base range (5K-17K, post-#987 D1.6). If the schema \
271             grew intentionally, update `tests/token_budget_guard.rs::\
272             VERBOSE_FULL_PROFILE_CEILING_TOKENS` AND this bound together."
273        );
274    }
275
276    /// Lookup by name should resolve a known tool.
277    #[test]
278    fn tool_size_resolves_memory_store() {
279        let t = tool_size("memory_store").expect("memory_store should exist");
280        assert!(t.total_tokens > 0);
281        assert!(t.total_tokens < 1500);
282    }
283
284    /// Lookup of a nonexistent tool should return None, not panic.
285    #[test]
286    fn tool_size_returns_none_for_unknown() {
287        assert!(tool_size("memory_does_not_exist_42").is_none());
288    }
289
290    /// v0.7 C4 + #859 acceptance gate: the trimmed `tools/list`
291    /// payload (the shape an MCP host actually receives by default)
292    /// must be materially smaller than the verbose ceiling AND must
293    /// stay under the post-#859 5000-token wire-form budget.
294    ///
295    /// **History.** Pre-#859 baseline: trimmed ≈ 3456 tokens,
296    /// verbose ≈ 7416 tokens (~53% saved). The trim dropped every
297    /// optional property entry from the wire, hiding the call
298    /// surface from MCP clients. #859 (v0.7.0 fix) restored every
299    /// property entry on the wire (keeping per-property `description`
300    /// prose stripped + the top-level tool description compacted to
301    /// the first sentence) so NHI agents can discover what knobs
302    /// exist. Post-#859: trimmed ≈ 4500-4700 tokens, verbose ≈ 9500.
303    ///
304    /// The savings now sit at ~50% (down from ~53%) because the
305    /// property metadata that pre-fix lived only in the verbose
306    /// catalog now also appears on the wire. The 5000 ceiling pins
307    /// the post-#859 floor with ~300 tokens of headroom for future
308    /// tool additions; the 25% lower bound on `saved_pct` keeps the
309    /// trim itself honest (a regression that re-bloated the wire
310    /// path with docs / per-property prose would still trip).
311    #[test]
312    fn trimmed_full_profile_total_under_post_859_ceiling() {
313        let trimmed = trimmed_full_profile_total_tokens();
314        let verbose = full_profile_total_tokens();
315        assert!(
316            trimmed < verbose,
317            "trimmed total ({trimmed}) must be strictly smaller than verbose ({verbose})"
318        );
319        let saved_pct = (verbose - trimmed) as f64 / verbose as f64 * 100.0;
320        assert!(
321            saved_pct >= 25.0,
322            "trim should save >=25% of full-profile tokens; got {saved_pct:.1}% \
323             (verbose={verbose}, trimmed={trimmed}). Audit `strip_docs_from_tools` and \
324             `wire_compact_descriptions` — if those broke the trim itself regressed."
325        );
326        assert!(
327            trimmed <= 11_000,
328            "post-#987 D1.6 trimmed full-profile total {trimmed} > 11000-token ceiling. \
329             The #859 fix preserves every property entry on the wire. The post-D1.6 \
330             ceiling rose from 5000 to 11000 because schemars-derived schemas carry \
331             additional metadata (`additionalProperties: false`, `default: null`, \
332             `$schema`, `title`, request-struct `description`) that the legacy \
333             hand-coded `tool_definitions()` macro did not emit. If trimmed grew \
334             beyond 11000, audit per-property `description` prose (must be stripped \
335             by `strip_docs_from_tools`) and consider routing the new tool to \
336             `family=power` instead of the always-on core."
337        );
338    }
339
340    /// Trim must shrink at least one optional from at least one tool;
341    /// otherwise the wiring is broken (e.g. `trim_optional_params` got
342    /// short-circuited or the keep-list went global).
343    #[test]
344    fn trimmed_table_strictly_smaller_per_tool_where_optionals_existed() {
345        let verbose: std::collections::HashMap<&str, usize> = tool_sizes()
346            .iter()
347            .map(|t| (t.name.as_str(), t.total_tokens))
348            .collect();
349        let mut at_least_one_smaller = false;
350        for trimmed_tool in trimmed_tool_sizes() {
351            let v = verbose
352                .get(trimmed_tool.name.as_str())
353                .copied()
354                .unwrap_or(0);
355            assert!(
356                trimmed_tool.total_tokens <= v,
357                "{} grew under trim ({} > {})",
358                trimmed_tool.name,
359                trimmed_tool.total_tokens,
360                v
361            );
362            if trimmed_tool.total_tokens < v {
363                at_least_one_smaller = true;
364            }
365        }
366        assert!(
367            at_least_one_smaller,
368            "trim should shrink at least one tool; none did — wiring is broken"
369        );
370    }
371}
ai_memory/sizes.rs

ai_memory/
sizes.rs