Skip to main content

mcp_compressor_core/compression/
engine.rs

1//! `CompressionEngine` — pure, stateless formatter for tool listings and schemas.
2//!
3//! All methods are pure functions: no I/O, no side-effects, no async.
4//! This makes them trivially testable in isolation.
5//!
6//! # Format rules (mirrors Python `_format_tool_description`)
7//!
8//! | Level  | Output shape |
9//! |--------|--------------|
10//! | Max    | `<tool>name</tool>` |
11//! | High   | `<tool>name(arg1, arg2)</tool>` |
12//! | Medium | `<tool>name(arg1, arg2): First sentence of description</tool>` |
13//! | Low    | `<tool>name(arg1, arg2): Full description</tool>` |
14//!
15//! `format_listing` at `Max` always returns an empty string; the frontend server
16//! instead exposes a dedicated `list_tools` MCP tool for that level.
17
18use crate::compression::CompressionLevel;
19
20/// A single MCP tool as seen by the compression engine.
21///
22/// Mirrors `mcp.types.Tool` and `fastmcp.tools.Tool`.
23#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
24pub struct Tool {
25    /// Canonical tool name (e.g. `"get_confluence_page"`).
26    pub name: String,
27    /// Human-readable description.  May be absent for tools without docs.
28    pub description: Option<String>,
29    /// JSON Schema object describing the accepted input (the `properties` key
30    /// holds named parameters; `required` lists mandatory ones).
31    pub input_schema: serde_json::Value,
32}
33
34impl Tool {
35    /// Convenience constructor used in tests.
36    pub fn new(
37        name: impl Into<String>,
38        description: impl Into<Option<String>>,
39        input_schema: serde_json::Value,
40    ) -> Self {
41        Self {
42            name: name.into(),
43            description: description.into(),
44            input_schema,
45        }
46    }
47
48    /// Return the ordered list of parameter names from `input_schema.properties`.
49    pub fn param_names(&self) -> Vec<String> {
50        self.input_schema
51            .get("properties")
52            .and_then(serde_json::Value::as_object)
53            .map(|properties| properties.keys().cloned().collect())
54            .unwrap_or_default()
55    }
56}
57
58/// Stateless compression engine.
59///
60/// Instantiated with a [`CompressionLevel`]; all formatting calls borrow the
61/// tool slice from the caller rather than owning it.
62#[derive(Debug, Clone)]
63pub struct CompressionEngine {
64    level: CompressionLevel,
65}
66
67impl CompressionEngine {
68    pub fn new(level: CompressionLevel) -> Self {
69        Self { level }
70    }
71
72    /// Format the listing of *all* tools at the engine's compression level.
73    ///
74    /// Returns an empty string when the level is `Max` — callers should expose
75    /// a `list_tools` MCP tool instead.
76    /// Otherwise joins individual [`format_tool`] results with `"\n"`.
77    pub fn format_listing(&self, tools: &[Tool]) -> String {
78        if self.level == CompressionLevel::Max {
79            return String::new();
80        }
81
82        tools
83            .iter()
84            .map(|tool| self.format_tool(tool))
85            .collect::<Vec<_>>()
86            .join("\n")
87    }
88
89    /// Format a *single* tool at the engine's compression level.
90    ///
91    /// See module-level doc for the format rules.
92    pub fn format_tool(&self, tool: &Tool) -> String {
93        format_tool_at_level(tool, &self.level)
94    }
95
96    /// Look up a tool by name in the provided slice.
97    ///
98    /// Returns `None` when the name is not found.
99    pub fn get_schema<'a>(&self, tools: &'a [Tool], name: &str) -> Option<&'a Tool> {
100        tools.iter().find(|tool| tool.name == name)
101    }
102
103    /// Build the full schema response string for a tool.
104    ///
105    /// Always uses `Low` verbosity regardless of the engine's configured level —
106    /// schema lookup is meant to give complete information.
107    ///
108    /// Format:
109    /// ```text
110    /// <tool>name(arg1, arg2): Full description</tool>
111    ///
112    /// {
113    ///   "type": "object",
114    ///   "properties": { ... },
115    ///   ...
116    /// }
117    /// ```
118    pub fn format_schema_response(tool: &Tool) -> String {
119        let tool_description = format_tool_at_level(tool, &CompressionLevel::Low);
120        let schema = serde_json::to_string_pretty(&tool.input_schema)
121            .unwrap_or_else(|_| tool.input_schema.to_string());
122        format!("{tool_description}\n\n{schema}")
123    }
124}
125
126fn format_tool_at_level(tool: &Tool, level: &CompressionLevel) -> String {
127    match level {
128        CompressionLevel::Max => format!("<tool>{}</tool>", tool.name),
129        CompressionLevel::High => format!("<tool>{}({})</tool>", tool.name, format_args(tool)),
130        CompressionLevel::Medium => format_with_description(tool, first_sentence_description(tool)),
131        CompressionLevel::Low => format_with_description(tool, tool.description.as_deref()),
132    }
133}
134
135fn format_with_description(tool: &Tool, description: Option<&str>) -> String {
136    let signature = format!("{}({})", tool.name, format_args(tool));
137    match description.map(str::trim).filter(|description| !description.is_empty()) {
138        Some(description) => format!("<tool>{signature}: {description}</tool>"),
139        None => format!("<tool>{signature}</tool>"),
140    }
141}
142
143fn format_args(tool: &Tool) -> String {
144    tool.param_names().join(", ")
145}
146
147fn first_sentence_description(tool: &Tool) -> Option<&str> {
148    let description = tool.description.as_deref()?.trim();
149    let first_paragraph = description
150        .split("\n\n")
151        .map(str::trim)
152        .find(|paragraph| !paragraph.is_empty())
153        .unwrap_or(description);
154    let first_line = first_paragraph
155        .lines()
156        .map(str::trim)
157        .find(|line| !line.is_empty())
158        .unwrap_or(first_paragraph);
159    Some(first_line.split('.').next().unwrap_or_default().trim())
160}
161
162// ---------------------------------------------------------------------------
163// Tests
164// ---------------------------------------------------------------------------
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169    use serde_json::json;
170
171    // ------------------------------------------------------------------
172    // Helpers
173    // ------------------------------------------------------------------
174
175    /// A tool with two string parameters and a multi-sentence description.
176    fn fetch_tool() -> Tool {
177        Tool::new(
178            "fetch",
179            Some("Fetch a URL. Returns the raw content.".into()),
180            json!({
181                "type": "object",
182                "properties": {
183                    "url":     { "type": "string", "description": "Target URL" },
184                    "timeout": { "type": "number", "description": "Timeout in seconds" }
185                },
186                "required": ["url"]
187            }),
188        )
189    }
190
191    /// A tool with a multi-line description.
192    fn multiline_tool() -> Tool {
193        Tool::new(
194            "multiline",
195            Some("First line description.\nSecond line continuation.".into()),
196            json!({ "type": "object", "properties": { "x": { "type": "string" } } }),
197        )
198    }
199
200    /// A tool with no description.
201    fn no_desc_tool() -> Tool {
202        Tool::new(
203            "ping",
204            None::<String>,
205            json!({ "type": "object", "properties": { "host": { "type": "string" } } }),
206        )
207    }
208
209    /// A tool with no parameters.
210    fn no_args_tool() -> Tool {
211        Tool::new(
212            "health",
213            Some("Check server health.".into()),
214            json!({ "type": "object", "properties": {} }),
215        )
216    }
217
218    // ------------------------------------------------------------------
219    // format_tool — Max
220    // ------------------------------------------------------------------
221
222    /// At Max, only the tool name is rendered (no arguments, no description).
223    #[test]
224    fn format_tool_max_name_only() {
225        let engine = CompressionEngine::new(CompressionLevel::Max);
226        assert_eq!(engine.format_tool(&fetch_tool()), "<tool>fetch</tool>");
227    }
228
229    /// At Max, a tool with no description is still just the name.
230    #[test]
231    fn format_tool_max_no_description() {
232        let engine = CompressionEngine::new(CompressionLevel::Max);
233        assert_eq!(engine.format_tool(&no_desc_tool()), "<tool>ping</tool>");
234    }
235
236    // ------------------------------------------------------------------
237    // format_tool — High
238    // ------------------------------------------------------------------
239
240    /// At High, arguments are listed but descriptions are omitted.
241    #[test]
242    fn format_tool_high_name_and_args() {
243        let engine = CompressionEngine::new(CompressionLevel::High);
244        assert_eq!(engine.format_tool(&fetch_tool()), "<tool>fetch(url, timeout)</tool>");
245    }
246
247    /// At High, a tool with no args shows an empty arg list.
248    #[test]
249    fn format_tool_high_no_args() {
250        let engine = CompressionEngine::new(CompressionLevel::High);
251        assert_eq!(engine.format_tool(&no_args_tool()), "<tool>health()</tool>");
252    }
253
254    // ------------------------------------------------------------------
255    // format_tool — Medium
256    // ------------------------------------------------------------------
257
258    /// At Medium, the first sentence of the description is included.
259    /// "Fetch a URL. Returns the raw content." → only "Fetch a URL" is kept.
260    #[test]
261    fn format_tool_medium_first_sentence() {
262        let engine = CompressionEngine::new(CompressionLevel::Medium);
263        let out = engine.format_tool(&fetch_tool());
264        assert_eq!(out, "<tool>fetch(url, timeout): Fetch a URL</tool>");
265    }
266
267    /// At Medium, only the first *line* of the description is considered
268    /// before splitting on ".".
269    #[test]
270    fn format_tool_medium_first_line_of_multiline() {
271        let engine = CompressionEngine::new(CompressionLevel::Medium);
272        let out = engine.format_tool(&multiline_tool());
273        // "First line description.\nSecond line..." → first line → before "." → "First line description"
274        assert_eq!(out, "<tool>multiline(x): First line description</tool>");
275    }
276
277    #[test]
278    fn format_tool_low_and_medium_differ_for_paragraph_descriptions() {
279        let tool = Tool::new(
280            "search",
281            Some("\n\nSearch the web.\n\nLonger details that should only appear at low verbosity.".to_string()),
282            json!({}),
283        );
284        let low = CompressionEngine::new(CompressionLevel::Low);
285        let medium = CompressionEngine::new(CompressionLevel::Medium);
286
287        assert!(low
288            .format_tool(&tool)
289            .contains("Longer details that should only appear at low verbosity"));
290        assert_eq!(medium.format_tool(&tool), "<tool>search(): Search the web</tool>");
291    }
292
293    /// At Medium, a tool with no description renders without a description suffix.
294    #[test]
295    fn format_tool_medium_no_description() {
296        let engine = CompressionEngine::new(CompressionLevel::Medium);
297        assert_eq!(engine.format_tool(&no_desc_tool()), "<tool>ping(host)</tool>");
298    }
299
300    // ------------------------------------------------------------------
301    // format_tool — Low
302    // ------------------------------------------------------------------
303
304    /// At Low, the complete description is included verbatim.
305    #[test]
306    fn format_tool_low_full_description() {
307        let engine = CompressionEngine::new(CompressionLevel::Low);
308        assert_eq!(
309            engine.format_tool(&fetch_tool()),
310            "<tool>fetch(url, timeout): Fetch a URL. Returns the raw content.</tool>",
311        );
312    }
313
314    /// At Low, a multi-line description is included in full (not truncated).
315    #[test]
316    fn format_tool_low_multiline_description_kept() {
317        let engine = CompressionEngine::new(CompressionLevel::Low);
318        let out = engine.format_tool(&multiline_tool());
319        assert!(out.contains("First line description."));
320        assert!(out.contains("Second line continuation."));
321    }
322
323    /// At Low, a tool with no args shows an empty arg list.
324    #[test]
325    fn format_tool_low_no_args() {
326        let engine = CompressionEngine::new(CompressionLevel::Low);
327        assert_eq!(engine.format_tool(&no_args_tool()), "<tool>health(): Check server health.</tool>");
328    }
329
330    // ------------------------------------------------------------------
331    // format_listing
332    // ------------------------------------------------------------------
333
334    /// At Max, format_listing always returns an empty string.
335    /// (The frontend server registers a list_tools MCP tool instead.)
336    #[test]
337    fn format_listing_max_returns_empty() {
338        let engine = CompressionEngine::new(CompressionLevel::Max);
339        assert_eq!(engine.format_listing(&[fetch_tool(), no_desc_tool()]), "");
340    }
341
342    /// An empty tool slice at any non-Max level returns an empty string.
343    #[test]
344    fn format_listing_empty_tools() {
345        for level in [CompressionLevel::Low, CompressionLevel::Medium, CompressionLevel::High] {
346            let engine = CompressionEngine::new(level);
347            assert_eq!(engine.format_listing(&[]), "");
348        }
349    }
350
351    /// Multiple tools are joined with newlines.
352    #[test]
353    fn format_listing_multiple_tools_joined_with_newline() {
354        let engine = CompressionEngine::new(CompressionLevel::High);
355        let tools = vec![fetch_tool(), no_args_tool()];
356        let listing = engine.format_listing(&tools);
357        let lines: Vec<&str> = listing.lines().collect();
358        assert_eq!(lines.len(), 2);
359        assert_eq!(lines[0], "<tool>fetch(url, timeout)</tool>");
360        assert_eq!(lines[1], "<tool>health()</tool>");
361    }
362
363    /// A single tool listing has no trailing newline.
364    #[test]
365    fn format_listing_single_tool_no_trailing_newline() {
366        let engine = CompressionEngine::new(CompressionLevel::High);
367        let listing = engine.format_listing(&[fetch_tool()]);
368        assert!(!listing.ends_with('\n'));
369    }
370
371    // ------------------------------------------------------------------
372    // get_schema
373    // ------------------------------------------------------------------
374
375    /// get_schema returns Some(&tool) when the name matches.
376    #[test]
377    fn get_schema_found() {
378        let engine = CompressionEngine::new(CompressionLevel::Medium);
379        let tools = vec![fetch_tool()];
380        let result = engine.get_schema(&tools, "fetch");
381        assert!(result.is_some());
382        assert_eq!(result.unwrap().name, "fetch");
383    }
384
385    /// get_schema returns None for an unknown name.
386    #[test]
387    fn get_schema_not_found() {
388        let engine = CompressionEngine::new(CompressionLevel::Medium);
389        let tools = vec![fetch_tool()];
390        assert!(engine.get_schema(&tools, "nonexistent").is_none());
391    }
392
393    /// get_schema on an empty tool list always returns None.
394    #[test]
395    fn get_schema_empty_list() {
396        let engine = CompressionEngine::new(CompressionLevel::Medium);
397        assert!(engine.get_schema(&[], "fetch").is_none());
398    }
399
400    // ------------------------------------------------------------------
401    // format_schema_response
402    // ------------------------------------------------------------------
403
404    /// Schema response includes the Low-detail tool description.
405    #[test]
406    fn format_schema_response_contains_low_description() {
407        let tool = fetch_tool();
408        let response = CompressionEngine::format_schema_response(&tool);
409        assert!(response.contains("<tool>fetch(url, timeout):"), "got: {response}");
410        assert!(response.contains("Fetch a URL. Returns the raw content."));
411    }
412
413    /// Schema response includes the pretty-printed JSON input schema.
414    #[test]
415    fn format_schema_response_contains_json_schema() {
416        let tool = fetch_tool();
417        let response = CompressionEngine::format_schema_response(&tool);
418        assert!(response.contains("\"properties\""), "got: {response}");
419        assert!(response.contains("\"url\""));
420    }
421
422    /// Schema response separates the description and schema with a blank line.
423    #[test]
424    fn format_schema_response_blank_line_separator() {
425        let tool = fetch_tool();
426        let response = CompressionEngine::format_schema_response(&tool);
427        assert!(response.contains("\n\n"), "expected blank-line separator, got: {response}");
428    }
429
430    // ------------------------------------------------------------------
431    // param_names
432    // ------------------------------------------------------------------
433
434    /// param_names returns parameter names in schema insertion order.
435    #[test]
436    fn param_names_returns_ordered_params() {
437        let tool = fetch_tool();
438        let names = tool.param_names();
439        // "url" appears before "timeout" in the schema definition
440        assert_eq!(names, vec!["url", "timeout"]);
441    }
442
443    /// param_names on a tool with no properties returns an empty vec.
444    #[test]
445    fn param_names_empty_schema() {
446        let tool = no_args_tool();
447        assert_eq!(tool.param_names(), Vec::<String>::new());
448    }
449}