Skip to main content

forge_guardrails/
schema_compression.rs

1//! Tool schema description compression for proxy-intercepted requests.
2//!
3//! Minification is conservative: it only touches description strings in tool
4//! schemas (top-level `ToolSpec.description` and every `"description"` value
5//! inside the JSON Schema). Names, parameter names, types, `required` arrays,
6//! and all other schema structure are never modified.
7
8use std::str::FromStr;
9
10use serde_json::Value;
11
12use crate::core::tool_spec::ToolSpec;
13
14/// Controls how tool schema descriptions are compressed before being forwarded
15/// upstream. Default is `Disabled`.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
17pub enum SchemaCompressionMode {
18    /// No mutation. Default.
19    #[default]
20    Disabled,
21    /// Trim trailing whitespace per line, collapse consecutive blank lines to
22    /// at most one, collapse internal space/tab runs to one space (outside
23    /// fenced code blocks), and drop `"description": ""` keys.
24    Minify,
25}
26
27impl SchemaCompressionMode {
28    /// Returns the canonical string representation.
29    pub fn as_str(self) -> &'static str {
30        match self {
31            Self::Disabled => "disabled",
32            Self::Minify => "minify",
33        }
34    }
35}
36
37impl FromStr for SchemaCompressionMode {
38    type Err = String;
39
40    fn from_str(s: &str) -> Result<Self, Self::Err> {
41        match s.trim().to_ascii_lowercase().as_str() {
42            "disabled" => Ok(Self::Disabled),
43            "minify" => Ok(Self::Minify),
44            _ => Err(format!(
45                "unknown schema compression mode '{s}'; expected disabled or minify"
46            )),
47        }
48    }
49}
50
51/// Summary of changes made by one schema compression pass.
52#[derive(Debug, Clone, Default)]
53pub struct SchemaCompressionStats {
54    /// Number of description fields that were shortened.
55    pub descriptions_changed: usize,
56    /// Number of empty (or whitespace-only) description fields that were dropped.
57    pub descriptions_dropped: usize,
58}
59
60/// Minify tool schema descriptions in place. Returns stats on what changed.
61///
62/// Transforms apply to `spec.description` and every `"description"` string
63/// inside `spec.json_schema` up to recursion depth 32. Tool names, parameter
64/// names, types, and `required` arrays are untouched.
65pub fn compress_tool_schemas(
66    specs: &mut [ToolSpec],
67    mode: SchemaCompressionMode,
68) -> SchemaCompressionStats {
69    let mut stats = SchemaCompressionStats::default();
70    if mode == SchemaCompressionMode::Disabled {
71        return stats;
72    }
73    for spec in specs.iter_mut() {
74        if let Some(minified) = minify_description(&spec.description) {
75            spec.description = minified;
76            stats.descriptions_changed += 1;
77        }
78        if let Some(json_schema) = spec.json_schema.as_mut() {
79            let (c, d) = minify_schema_descriptions(json_schema, 0);
80            stats.descriptions_changed += c;
81            stats.descriptions_dropped += d;
82        }
83    }
84    stats
85}
86
87/// Minify tool schema descriptions in a raw Anthropic request body.
88///
89/// Walks `body["tools"][*]["description"]` and
90/// `body["tools"][*]["input_schema"]` and applies the same transforms as
91/// `compress_tool_schemas`. Returns true if any description was changed or
92/// dropped. Both paths share `minify_description`, so the resulting strings
93/// are byte-identical.
94pub fn patch_anthropic_tool_schemas(body: &mut Value, mode: SchemaCompressionMode) -> bool {
95    if mode == SchemaCompressionMode::Disabled {
96        return false;
97    }
98    let Some(tools) = body.get_mut("tools").and_then(Value::as_array_mut) else {
99        return false;
100    };
101    let mut any_changed = false;
102    for tool in tools.iter_mut() {
103        let Some(obj) = tool.as_object_mut() else {
104            continue;
105        };
106        if let Some(Value::String(desc)) = obj.get_mut("description") {
107            if let Some(minified) = minify_description(desc) {
108                *desc = minified;
109                any_changed = true;
110            }
111        }
112        if let Some(schema) = obj.get_mut("input_schema") {
113            let (c, d) = minify_schema_descriptions(schema, 0);
114            if c + d > 0 {
115                any_changed = true;
116            }
117        }
118    }
119    any_changed
120}
121
122const MAX_SCHEMA_RECURSION_DEPTH: usize = 32;
123
124/// Returns `Some(minified)` if the text was changed, `None` if already clean.
125pub(crate) fn minify_description(desc: &str) -> Option<String> {
126    let result = apply_minify(desc);
127    if result == desc {
128        None
129    } else {
130        Some(result)
131    }
132}
133
134fn apply_minify(desc: &str) -> String {
135    let mut in_fence = false;
136    let mut consecutive_blanks = 0usize;
137    let mut lines_out: Vec<String> = Vec::new();
138
139    for line in desc.lines() {
140        let trimmed_end = line.trim_end();
141
142        // Fenced code block boundary — toggle and pass through.
143        if trimmed_end.starts_with("```") {
144            in_fence = !in_fence;
145            consecutive_blanks = 0;
146            lines_out.push(trimmed_end.to_string());
147            continue;
148        }
149
150        if in_fence {
151            lines_out.push(trimmed_end.to_string());
152            consecutive_blanks = 0;
153            continue;
154        }
155
156        let processed = collapse_internal_whitespace(trimmed_end);
157        if processed.is_empty() {
158            consecutive_blanks += 1;
159            if consecutive_blanks <= 1 {
160                lines_out.push(String::new());
161            }
162        } else {
163            consecutive_blanks = 0;
164            lines_out.push(processed);
165        }
166    }
167
168    while lines_out.last().is_some_and(|l| l.is_empty()) {
169        lines_out.pop();
170    }
171    lines_out.join("\n")
172}
173
174fn collapse_internal_whitespace(line: &str) -> String {
175    let mut result = String::with_capacity(line.len());
176    let mut last_was_space = false;
177    for ch in line.chars() {
178        if ch == ' ' || ch == '\t' {
179            if !last_was_space && !result.is_empty() {
180                result.push(' ');
181            }
182            last_was_space = true;
183        } else {
184            result.push(ch);
185            last_was_space = false;
186        }
187    }
188    result
189}
190
191/// Walk a JSON value and minify every `"description"` string key.
192/// Returns (changed_count, dropped_count).
193fn minify_schema_descriptions(value: &mut Value, depth: usize) -> (usize, usize) {
194    if depth > MAX_SCHEMA_RECURSION_DEPTH {
195        return (0, 0);
196    }
197    let mut changed = 0usize;
198    let mut dropped = 0usize;
199    match value {
200        Value::Object(obj) => {
201            // Compute the new value (or drop) for "description" before mutating.
202            let desc_action = obj.get("description").and_then(Value::as_str).map(|d| {
203                if d.is_empty() {
204                    None // drop
205                } else {
206                    let m = apply_minify(d);
207                    if m.is_empty() {
208                        None // drop
209                    } else if m != d {
210                        Some(m) // replace
211                    } else {
212                        Some(d.to_string()) // unchanged — will not count
213                    }
214                }
215            });
216            match desc_action {
217                Some(None) => {
218                    obj.remove("description");
219                    dropped += 1;
220                }
221                Some(Some(ref new_val)) => {
222                    // Only count as changed if the value actually differs.
223                    let was_changed = obj
224                        .get("description")
225                        .and_then(Value::as_str)
226                        .is_some_and(|old| old != new_val.as_str());
227                    if was_changed {
228                        obj.insert("description".to_string(), Value::String(new_val.clone()));
229                        changed += 1;
230                    }
231                }
232                None => {} // key absent — nothing to do
233            }
234            // Recurse into all other values (collect keys first to avoid borrow issues).
235            let keys: Vec<String> = obj.keys().cloned().collect();
236            for key in keys {
237                if let Some(v) = obj.get_mut(&key) {
238                    let (c, d) = minify_schema_descriptions(v, depth + 1);
239                    changed += c;
240                    dropped += d;
241                }
242            }
243        }
244        Value::Array(arr) => {
245            for v in arr.iter_mut() {
246                let (c, d) = minify_schema_descriptions(v, depth + 1);
247                changed += c;
248                dropped += d;
249            }
250        }
251        _ => {}
252    }
253    (changed, dropped)
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259    use serde_json::json;
260
261    #[test]
262    fn schema_mode_from_str_roundtrips() {
263        assert_eq!(
264            SchemaCompressionMode::from_str("disabled").unwrap(),
265            SchemaCompressionMode::Disabled
266        );
267        assert_eq!(
268            SchemaCompressionMode::from_str("minify").unwrap(),
269            SchemaCompressionMode::Minify
270        );
271        assert_eq!(
272            SchemaCompressionMode::from_str("MINIFY").unwrap(),
273            SchemaCompressionMode::Minify
274        );
275    }
276
277    #[test]
278    fn schema_mode_invalid_returns_err() {
279        let err = SchemaCompressionMode::from_str("gzip").unwrap_err();
280        assert!(err.contains("gzip"), "error should mention the bad input");
281        assert!(
282            err.contains("disabled") || err.contains("minify"),
283            "error should list valid options"
284        );
285    }
286
287    #[test]
288    fn minify_collapses_internal_whitespace() {
289        assert_eq!(
290            minify_description("  hello   world  ").unwrap(),
291            "hello world"
292        );
293    }
294
295    #[test]
296    fn minify_trims_trailing_whitespace() {
297        assert_eq!(
298            minify_description("hello   \nworld   ").unwrap(),
299            "hello\nworld"
300        );
301    }
302
303    #[test]
304    fn minify_collapses_excess_blank_lines() {
305        let desc = "a\n\n\n\nb";
306        let result = minify_description(desc).unwrap();
307        assert!(result.contains("a") && result.contains("b"));
308        assert!(
309            !result.contains("\n\n\n"),
310            "should collapse to max 2 blanks"
311        );
312    }
313
314    #[test]
315    fn minify_preserves_fenced_code_block_content() {
316        let desc = "Before:\n```\n  preserved   whitespace  \n```\nAfter";
317        // Content inside fence should be left alone (only trailing whitespace trimmed).
318        if let Some(result) = minify_description(desc) {
319            assert!(
320                result.contains("  preserved   whitespace"),
321                "fenced content must not be collapsed"
322            );
323        }
324        // "After" must still be present.
325        let result = apply_minify(desc);
326        assert!(result.contains("After"));
327    }
328
329    #[test]
330    fn minify_idempotent() {
331        let desc = "  hello   world  \n\n\n\nline2   ";
332        let once = apply_minify(desc);
333        let twice = apply_minify(&once);
334        assert_eq!(once, twice, "minification must be idempotent");
335    }
336
337    #[test]
338    fn minify_unchanged_returns_none() {
339        assert_eq!(minify_description("already clean"), None);
340    }
341
342    #[test]
343    fn compress_tool_schemas_disabled_noop() {
344        use crate::core::tool_spec::param_model::ParamModel;
345        let original_desc = "  A   tool  ";
346        let mut specs = vec![ToolSpec {
347            name: "tool".to_string(),
348            description: original_desc.to_string(),
349            parameters: ParamModel::Object {
350                description: None,
351                required: true,
352                properties: Default::default(),
353            },
354            json_schema: None,
355        }];
356        let stats = compress_tool_schemas(&mut specs, SchemaCompressionMode::Disabled);
357        assert_eq!(specs[0].description, original_desc);
358        assert_eq!(stats.descriptions_changed, 0);
359        assert_eq!(stats.descriptions_dropped, 0);
360    }
361
362    #[test]
363    fn compress_tool_schemas_minifies_descriptions() {
364        use crate::core::tool_spec::param_model::ParamModel;
365        let mut specs = vec![ToolSpec {
366            name: "tool".to_string(),
367            description: "  A   tool  ".to_string(),
368            parameters: ParamModel::Object {
369                description: None,
370                required: true,
371                properties: Default::default(),
372            },
373            json_schema: Some(json!({
374                "properties": {
375                    "param": {
376                        "type": "string",
377                        "description": "  A   param  "
378                    }
379                }
380            })),
381        }];
382        let stats = compress_tool_schemas(&mut specs, SchemaCompressionMode::Minify);
383        assert_eq!(specs[0].description, "A tool");
384        assert!(stats.descriptions_changed >= 1);
385        let pdesc = specs[0].json_schema.as_ref().unwrap()["properties"]["param"]["description"]
386            .as_str()
387            .unwrap();
388        assert_eq!(pdesc, "A param");
389    }
390
391    #[test]
392    fn patch_anthropic_disabled_noop() {
393        let mut body = json!({
394            "tools": [{"name": "bash", "description": "  Run   a   command  "}]
395        });
396        let changed = patch_anthropic_tool_schemas(&mut body, SchemaCompressionMode::Disabled);
397        assert!(!changed);
398        assert_eq!(
399            body["tools"][0]["description"].as_str().unwrap(),
400            "  Run   a   command  "
401        );
402    }
403
404    #[test]
405    fn patch_anthropic_minifies_descriptions() {
406        let mut body = json!({
407            "tools": [{
408                "name": "bash",
409                "description": "  Run   a   command  ",
410                "input_schema": {
411                    "type": "object",
412                    "properties": {
413                        "cmd": {
414                            "type": "string",
415                            "description": "  The   command  "
416                        }
417                    }
418                }
419            }]
420        });
421        let changed = patch_anthropic_tool_schemas(&mut body, SchemaCompressionMode::Minify);
422        assert!(changed);
423        assert_eq!(
424            body["tools"][0]["description"].as_str().unwrap(),
425            "Run a command"
426        );
427        assert_eq!(
428            body["tools"][0]["input_schema"]["properties"]["cmd"]["description"]
429                .as_str()
430                .unwrap(),
431            "The command"
432        );
433    }
434
435    #[test]
436    fn toolspec_and_anthropic_paths_byte_identical() {
437        let raw = "  Runs   a   shell   command  ";
438        let via_toolspec = minify_description(raw).unwrap_or_else(|| raw.to_string());
439        let mut body = json!({"tools": [{"name": "bash", "description": raw}]});
440        patch_anthropic_tool_schemas(&mut body, SchemaCompressionMode::Minify);
441        let via_anthropic = body["tools"][0]["description"].as_str().unwrap();
442        assert_eq!(
443            via_toolspec, via_anthropic,
444            "ToolSpec path and Anthropic body path must be byte-identical"
445        );
446    }
447}