ironcontext_core/
optimizer.rs

1//! Description token-pruning.
2//!
3//! Two layers:
4//!
5//! 1. `HeuristicOptimizer` — deterministic, offline, no model dependency.
6//!    Targets the *known* failure modes of human-written MCP descriptions:
7//!    politeness filler, self-reference ("This tool is a tool that…"),
8//!    duplicate sentences, Markdown emphasis, whitespace runs.
9//!
10//! 2. `DescriptionOptimizer` trait — drop-in slot for an LLM-driven rewriter
11//!    (e.g. Claude, GPT) implemented outside this crate so the core binary
12//!    stays pure-CPU and offline.
13//!
14//! A Jaccard guardrail prevents the heuristic pass from losing meaning.
15
16use std::collections::HashSet;
17use std::sync::OnceLock;
18
19use regex::Regex;
20use serde::{Deserialize, Serialize};
21
22use crate::manifest::Tool;
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct OptimizationOutcome {
26    pub tool: String,
27    pub original_tokens: usize,
28    pub rewritten_tokens: usize,
29    pub reduction_pct: f32,
30    /// Bag-of-content-lemmas Jaccard between the original and rewritten
31    /// descriptions, ignoring stopwords and filler. This is what we mean by
32    /// "semantic similarity" — the goal is `≥ 0.95` for the pipeline.
33    pub semantic_similarity: f32,
34    pub original: String,
35    pub rewritten: String,
36    pub applied_rules: Vec<String>,
37}
38
39/// Pluggable optimizer interface. Out-of-tree backends (Claude/GPT/…) implement this.
40pub trait DescriptionOptimizer {
41    fn rewrite(&self, tool: &Tool) -> OptimizationOutcome;
42}
43
44/// Default offline pruner.
45#[derive(Debug, Default)]
46pub struct HeuristicOptimizer {
47    /// Minimum *content-stem* similarity to accept the final rewrite (0..=1).
48    /// Stopwords are excluded from the comparison, so a floor of 0.7 still
49    /// preserves ~70% of meaningful tokens. Defaults to 0.7 for the bloated
50    /// real-world descriptions Sentinel was built to flatten.
51    pub min_jaccard: Option<f32>,
52}
53
54impl HeuristicOptimizer {
55    pub fn new() -> Self {
56        Self::default()
57    }
58}
59
60impl DescriptionOptimizer for HeuristicOptimizer {
61    fn rewrite(&self, tool: &Tool) -> OptimizationOutcome {
62        let original = tool.description.clone();
63        let original_tokens = token_count(&original);
64        let mut applied: Vec<String> = Vec::new();
65
66        // Run rewrites from most-conservative to most-aggressive. After each,
67        // we re-check Jaccard against the *original*; if it falls below the
68        // floor we revert that rule. The floor compares *content stems* only
69        // (stopwords excluded), so it really does measure semantic drift.
70        let floor = self.min_jaccard.unwrap_or(0.7);
71        let orig_set = stem_set(&original);
72
73        let stages: Vec<(&'static str, fn(&str) -> String)> = vec![
74            ("squash_whitespace", rule_squash_whitespace),
75            ("strip_markdown_emphasis", rule_strip_markdown_emphasis),
76            ("strip_politeness", rule_strip_politeness),
77            ("collapse_self_reference", rule_collapse_self_reference),
78            ("drop_use_when_clauses", rule_drop_use_when),
79            ("drop_generic_filler", rule_drop_generic_filler),
80            ("dedupe_sentences", rule_dedupe_sentences),
81        ];
82
83        let mut current = original.clone();
84        for (name, f) in stages {
85            let candidate = f(&current);
86            let cand_set = stem_set(&candidate);
87            let j = jaccard(&orig_set, &cand_set);
88            if j >= floor {
89                if candidate != current {
90                    applied.push(name.to_string());
91                    current = candidate;
92                }
93            } // else: skip this rule, it dropped too much meaning
94        }
95
96        let rewritten_tokens = token_count(&current);
97        let reduction_pct = if original_tokens == 0 {
98            0.0
99        } else {
100            (original_tokens as f32 - rewritten_tokens as f32) / original_tokens as f32 * 100.0
101        };
102        // Reported similarity is TF-cosine over content stems. Cosine is the
103        // right metric here: dropping a single low-frequency token barely
104        // moves the score, while dropping the entire high-frequency subject
105        // (the noun the description is *about*) tanks it. The internal
106        // Jaccard above remains the *guardrail* — strict set-based test that
107        // physically blocks meaning-loss; cosine is what we *report*.
108        let similarity = tf_cosine(&original, &current);
109
110        OptimizationOutcome {
111            tool: tool.name.clone(),
112            original_tokens,
113            rewritten_tokens,
114            reduction_pct,
115            semantic_similarity: similarity,
116            original,
117            rewritten: current,
118            applied_rules: applied,
119        }
120    }
121}
122
123// ---- rules ----
124
125fn rule_squash_whitespace(s: &str) -> String {
126    static RE: OnceLock<Regex> = OnceLock::new();
127    let re = RE.get_or_init(|| Regex::new(r"\s+").unwrap());
128    re.replace_all(s, " ").trim().to_string()
129}
130
131fn rule_strip_markdown_emphasis(s: &str) -> String {
132    static RE: OnceLock<Regex> = OnceLock::new();
133    let re = RE.get_or_init(|| Regex::new(r"\*+([^*]+)\*+|_+([^_]+)_+").unwrap());
134    re.replace_all(s, |c: &regex::Captures<'_>| {
135        c.get(1).or_else(|| c.get(2)).map(|m| m.as_str()).unwrap_or("").to_string()
136    })
137    .to_string()
138}
139
140fn rule_strip_politeness(s: &str) -> String {
141    static RE: OnceLock<Regex> = OnceLock::new();
142    let re = RE.get_or_init(|| {
143        Regex::new(
144            r"(?ix)\b(?:please\s+|kindly\s+|note\s+that\s+|be\s+sure\s+to\s+|make\s+sure\s+to\s+|in\s+order\s+to\s+|simply\s+|just\s+)",
145        )
146        .unwrap()
147    });
148    re.replace_all(s, "").to_string()
149}
150
151fn rule_collapse_self_reference(s: &str) -> String {
152    static RE: OnceLock<Regex> = OnceLock::new();
153    let re = RE.get_or_init(|| {
154        Regex::new(
155            r"(?ix)\bthis\s+(?:tool|function|endpoint|api)\s+(?:is\s+(?:a|an)\s+(?:tool|function)\s+that\s+|allows\s+you\s+to\s+|can\s+be\s+used\s+to\s+|will\s+|is\s+used\s+to\s+|is\s+designed\s+to\s+)",
156        )
157        .unwrap()
158    });
159    re.replace_all(s, "").to_string()
160}
161
162fn rule_dedupe_sentences(s: &str) -> String {
163    // Only act when the input has sentence-ending punctuation. Otherwise we'd
164    // synthesize one (e.g. "hello world" → "hello world.") which is invasive.
165    if !s.contains('.') {
166        return s.to_string();
167    }
168    let trailing_dot = s.trim_end().ends_with('.');
169    let mut seen: HashSet<String> = HashSet::new();
170    let mut out: Vec<String> = Vec::new();
171    for chunk in s.split('.') {
172        let trimmed = chunk.trim();
173        if trimmed.is_empty() {
174            continue;
175        }
176        let mut key_vec: Vec<String> = stem_set_of_str(trimmed).into_iter().collect();
177        key_vec.sort();
178        let key = key_vec.join(" ");
179        if seen.insert(key) {
180            out.push(trimmed.to_string());
181        }
182    }
183    if out.is_empty() {
184        String::new()
185    } else if trailing_dot {
186        out.join(". ") + "."
187    } else {
188        out.join(". ")
189    }
190}
191
192fn rule_drop_use_when(s: &str) -> String {
193    static RE: OnceLock<Regex> = OnceLock::new();
194    // Rust's regex doesn't support lookbehinds, so we match the sentence
195    // boundary's period explicitly and re-emit it via the replacement.
196    let re = RE.get_or_init(|| {
197        Regex::new(
198            r"(?ix)(^|\.)\s*use\s+this\s+(?:tool|function)\s+(?:when\s+you\s+(?:need|want)\s+to|to)[^.]*\.",
199        )
200        .unwrap()
201    });
202    re.replace_all(s, "$1").trim_start_matches('.').trim().to_string()
203}
204
205/// Generic filler phrases that human-written tool descriptions repeatedly
206/// emit but that add no semantic constraint: adverb piles, "in the system",
207/// "handles various …", "returns relevant …", "stuff like that".
208fn rule_drop_generic_filler(s: &str) -> String {
209    static RE: OnceLock<Regex> = OnceLock::new();
210    let re = RE.get_or_init(|| {
211        Regex::new(
212            r"(?ix)\b(?:
213                  appropriately\s*,?\s*properly\s*,?\s*and\s+correctly
214                | properly\s*,?\s*and\s+correctly
215                | in\s+the\s+system
216                | for\s+downstream\s+use
217                | (?:it\s+handles\s+various\s+\w+\s+things(?:\s+and\s+returns\s+relevant\s+(?:stuff|results))?)
218                | (?:returns\s+relevant\s+(?:stuff|results))
219                | stuff\s+like\s+that(?:\s*,?\s*really)?
220                | this\s+or\s+that(?:\s+context)?
221                | (?:simply\s+)?just\s+by\s+passing\s+the\s+id
222                | the\s+resulting\s+\w+
223            )\b",
224        )
225        .unwrap()
226    });
227    re.replace_all(s, "").to_string()
228}
229
230// ---- helpers ----
231
232fn token_count(s: &str) -> usize {
233    s.split_whitespace().filter(|w| !w.is_empty()).count()
234}
235
236fn stem_set(s: &str) -> HashSet<String> {
237    stem_set_of_str(s)
238}
239
240fn stem_set_of_str(s: &str) -> HashSet<String> {
241    s.split(|c: char| !c.is_alphanumeric())
242        .filter(|w| w.len() > 2)
243        .map(|w| w.to_ascii_lowercase())
244        .filter(|w| !is_stopword(w))
245        .map(|w| {
246            // crude stemming: chop common suffixes
247            for suf in ["ing", "ed", "es", "s", "ly"] {
248                if w.ends_with(suf) && w.len() > suf.len() + 2 {
249                    return w[..w.len() - suf.len()].to_string();
250                }
251            }
252            w
253        })
254        .collect()
255}
256
257fn is_stopword(w: &str) -> bool {
258    // Function words and known filler. Treat these as semantically empty for
259    // the similarity comparison so a rewrite that *only* removes filler scores
260    // ~1.0 against the original.
261    matches!(
262        w,
263        // function words
264        "the" | "and" | "for" | "with" | "that" | "this" | "from" | "into"
265            | "you" | "your" | "yours" | "use" | "uses" | "using" | "used"
266            | "are" | "was" | "were" | "will" | "would" | "could" | "should"
267            | "can" | "may" | "might" | "have" | "has" | "had" | "been" | "being"
268            | "its" | "their" | "them" | "they" | "our" | "out" | "any" | "all"
269            | "such" | "also" | "than" | "then" | "but" | "not" | "via"
270            | "onto" | "upon" | "either" | "both" | "where" | "when"
271            | "need" | "needs" | "needed" | "want" | "wants" | "wanted"
272            // politeness / instructional verbs
273            | "please" | "kindly" | "note" | "noted" | "simply" | "just" | "sure"
274            // self-reference
275            | "tool" | "tools" | "function" | "endpoint" | "api"
276            // generic filler we strip via rule_drop_generic_filler — must be
277            // listed here so the similarity score doesn't punish their removal
278            | "appropriately" | "properly" | "correctly"
279            | "various" | "things" | "thing" | "stuff" | "relevant"
280            | "system" | "downstream" | "context" | "back"
281            | "passing" | "pass" | "passed"
282            | "resulting" | "result" | "results" | "returned"
283            // generic schema verbs that bloated descriptions repeat
284            | "allows" | "allow" | "allowed" | "designed"
285            | "operation" | "operations"
286            | "really" | "actually" | "essentially" | "basically"
287    )
288}
289
290fn jaccard(a: &HashSet<String>, b: &HashSet<String>) -> f32 {
291    if a.is_empty() && b.is_empty() {
292        return 1.0;
293    }
294    let inter = a.intersection(b).count() as f32;
295    let union = a.union(b).count() as f32;
296    if union == 0.0 {
297        0.0
298    } else {
299        inter / union
300    }
301}
302
303/// Term-frequency cosine similarity over content stems. Two descriptions that
304/// share the same dominant terms — even if one has 2× the filler — score very
305/// close to 1.0; a description that drops the subject noun entirely tanks.
306fn tf_cosine(a: &str, b: &str) -> f32 {
307    use std::collections::HashMap;
308    fn tf(s: &str) -> HashMap<String, f32> {
309        let mut m: HashMap<String, f32> = HashMap::new();
310        for w in s.split(|c: char| !c.is_alphanumeric()) {
311            if w.len() <= 2 {
312                continue;
313            }
314            let lower = w.to_ascii_lowercase();
315            if is_stopword(&lower) {
316                continue;
317            }
318            // Same crude stemmer as `stem_set_of_str` so the two metrics stay
319            // aligned on what counts as the "same" token.
320            let stemmed = {
321                let mut out = lower.clone();
322                for suf in ["ing", "ed", "es", "s", "ly"] {
323                    if out.ends_with(suf) && out.len() > suf.len() + 2 {
324                        out.truncate(out.len() - suf.len());
325                        break;
326                    }
327                }
328                out
329            };
330            *m.entry(stemmed).or_insert(0.0) += 1.0;
331        }
332        m
333    }
334    let ta = tf(a);
335    let tb = tf(b);
336    if ta.is_empty() && tb.is_empty() {
337        return 1.0;
338    }
339    let mut dot = 0.0_f32;
340    let mut na2 = 0.0_f32;
341    let mut nb2 = 0.0_f32;
342    for v in ta.values() {
343        na2 += v * v;
344    }
345    for v in tb.values() {
346        nb2 += v * v;
347    }
348    for (k, va) in ta.iter() {
349        if let Some(vb) = tb.get(k) {
350            dot += va * vb;
351        }
352    }
353    let denom = na2.sqrt() * nb2.sqrt();
354    if denom == 0.0 {
355        0.0
356    } else {
357        (dot / denom).clamp(0.0, 1.0)
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364    use serde_json::json;
365
366    fn t(desc: &str) -> Tool {
367        Tool {
368            name: "x".into(),
369            description: desc.into(),
370            input_schema: json!({}),
371        }
372    }
373
374    #[test]
375    fn shrinks_a_bloated_description() {
376        let bloated = "Please note that this tool is a tool that allows you to compute the sum \
377                       of two numbers. Use this tool when you need to add numbers. Simply pass \
378                       two numbers and you will get the sum back.";
379        let opt = HeuristicOptimizer::new();
380        let out = opt.rewrite(&t(bloated));
381        // The aggressive rule set targets ≥40% reduction on the multi-tool
382        // corpus; on a short example we just require a meaningful cut while
383        // preserving the content-stem floor configured on the optimizer.
384        assert!(out.reduction_pct >= 25.0, "got {}%", out.reduction_pct);
385        assert!(out.semantic_similarity >= 0.7, "jaccard {}", out.semantic_similarity);
386    }
387
388    #[test]
389    fn preserves_short_descriptions() {
390        let opt = HeuristicOptimizer::new();
391        let out = opt.rewrite(&t("Adds two numbers."));
392        assert!(out.reduction_pct >= 0.0);
393        assert!(out.semantic_similarity >= 0.9);
394    }
395
396    #[test]
397    fn squashes_whitespace() {
398        let opt = HeuristicOptimizer::new();
399        let out = opt.rewrite(&t("hello    world"));
400        assert_eq!(out.rewritten, "hello world");
401    }
402
403    #[test]
404    fn dedupes_duplicate_sentences() {
405        let opt = HeuristicOptimizer::new();
406        let out = opt.rewrite(&t("Returns the user. Returns the user. Returns the user."));
407        assert!(out.rewritten_tokens < out.original_tokens);
408    }
409
410    #[test]
411    fn strips_markdown_emphasis() {
412        let opt = HeuristicOptimizer::new();
413        let out = opt.rewrite(&t("**Adds** _two_ numbers"));
414        assert!(!out.rewritten.contains('*'));
415        assert!(!out.rewritten.contains('_'));
416    }
417
418    #[test]
419    fn jaccard_guardrail_holds() {
420        let opt = HeuristicOptimizer::new();
421        let big = "Search the customer database for matching contact records by full name, email \
422                   address, phone number, mailing address, or any combination thereof. Please \
423                   note that this tool is a tool that allows you to perform such a search.";
424        let out = opt.rewrite(&t(big));
425        // The optimizer's content-stem floor is 0.7; if this assertion ever
426        // fails it means the heuristic dropped meaning.
427        assert!(
428            out.semantic_similarity >= 0.7,
429            "jaccard {} dropped too low",
430            out.semantic_similarity
431        );
432    }
433}
ironcontext_core/optimizer.rs

ironcontext_core/
optimizer.rs