Skip to main content

mcpr_core/protocol/
schema.rs

1//! MCP schema capture: types, pagination merging, and diff logic.
2//!
3//! This module understands the structure of MCP discovery responses
4//! (`initialize`, `tools/list`, `resources/list`, `prompts/list`,
5//! `resources/templates/list`) and provides:
6//!
7//! - **Pagination detection**: Determine if a response is a single page or
8//!   part of a paginated sequence (MCP cursor-based pagination).
9//! - **Page merging**: Combine paginated responses into a single snapshot.
10//! - **Schema diffing**: Compare two snapshots to detect added, removed,
11//!   and modified items (tools, resources, prompts).
12//!
13//! This is pure protocol logic — no HTTP, no storage, no hashing.
14//! The proxy and storage layers consume these functions.
15
16use std::collections::HashMap;
17
18use serde::Serialize;
19use serde_json::Value;
20
21use super::McpMethod;
22
23// ── Types ────────────────────────────────────────────────────────────
24
25/// Pagination state for an MCP list response.
26///
27/// Determined by checking `params.cursor` in the request and
28/// `result.nextCursor` in the response.
29#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
30#[serde(rename_all = "snake_case")]
31pub enum PageStatus {
32    /// Single-page response (no pagination). This is the common path.
33    Complete,
34    /// First page of a paginated response (no cursor in request, has nextCursor).
35    FirstPage,
36    /// Middle page (has cursor in request and nextCursor in response).
37    MiddlePage,
38    /// Last page (has cursor in request, no nextCursor in response).
39    LastPage,
40}
41
42/// Result of diffing two schema snapshots for a single MCP method.
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub struct SchemaDiff {
45    /// Type of change: "tool_added", "tool_removed", "tool_modified",
46    /// "resource_added", "prompt_modified", "updated", etc.
47    pub change_type: String,
48    /// Name of the affected item (e.g., "search_products"). None for
49    /// bulk changes like "updated" or "initial".
50    pub item_name: Option<String>,
51}
52
53// ── Public functions ─────────────────────────────────────────────────
54
55/// Check if an MCP method is a schema discovery method whose response
56/// should be captured.
57pub fn is_schema_method(method: &McpMethod) -> bool {
58    matches!(
59        method,
60        McpMethod::Initialize
61            | McpMethod::ToolsList
62            | McpMethod::ResourcesList
63            | McpMethod::ResourcesTemplatesList
64            | McpMethod::PromptsList
65    )
66}
67
68/// Determine pagination status from the request body and response body.
69///
70/// MCP pagination uses cursor-based paging:
71/// - Request `params.cursor` present → continuing from a previous page.
72/// - Response `result.nextCursor` present → more pages available.
73pub fn detect_page_status(request_body: &Value, response_body: &Value) -> PageStatus {
74    let req_has_cursor = request_body
75        .get("params")
76        .and_then(|p| p.get("cursor"))
77        .and_then(|c| c.as_str())
78        .is_some();
79
80    let resp_has_next_cursor = response_body
81        .get("result")
82        .and_then(|r| r.get("nextCursor"))
83        .and_then(|c| c.as_str())
84        .is_some();
85
86    match (req_has_cursor, resp_has_next_cursor) {
87        (false, false) => PageStatus::Complete,
88        (false, true) => PageStatus::FirstPage,
89        (true, true) => PageStatus::MiddlePage,
90        (true, false) => PageStatus::LastPage,
91    }
92}
93
94/// Merge paginated list responses into a single combined `result` payload.
95///
96/// Each page is the `result` field from a JSON-RPC response. This function
97/// merges the array field (tools, resources, resourceTemplates, prompts)
98/// across all pages into a single value.
99///
100/// Returns `None` if pages is empty or the method has no array key.
101pub fn merge_pages(method: &str, pages: &[Value]) -> Option<Value> {
102    if pages.is_empty() {
103        return None;
104    }
105
106    // List methods (tools/list, resources/list, …) must extract only the
107    // named array so per-request metadata (`_meta`, server-generated
108    // request ids, etc.) does not leak into the hash and produce
109    // phantom versions. Non-list methods (initialize) retain the raw
110    // page — they have no array to project.
111    let Some(array_key) = method_array_key(method) else {
112        return (pages.len() == 1).then(|| pages[0].clone());
113    };
114
115    let mut merged_array: Vec<Value> = Vec::new();
116    for page in pages {
117        if let Some(arr) = page.get(array_key).and_then(|a| a.as_array()) {
118            merged_array.extend(arr.iter().cloned());
119        }
120    }
121
122    Some(serde_json::json!({ array_key: merged_array }))
123}
124
125/// Diff two schema payloads for a list method.
126///
127/// Compares named items (by their `name` field) and returns granular
128/// changes: added, removed, and modified items.
129///
130/// For methods without named items (e.g., `initialize`), returns a
131/// single "updated" diff if the payloads differ.
132pub fn diff_schema(method: &str, old_payload: &Value, new_payload: &Value) -> Vec<SchemaDiff> {
133    let array_key = match method_array_key(method) {
134        Some(key) => key,
135        None => {
136            // Non-list method (e.g., initialize) — no granular diff.
137            return vec![SchemaDiff {
138                change_type: "updated".to_string(),
139                item_name: None,
140            }];
141        }
142    };
143
144    let item_type = method_item_type(method);
145    let old_items = extract_named_items(old_payload, array_key);
146    let new_items = extract_named_items(new_payload, array_key);
147
148    let mut changes = Vec::new();
149
150    // Find added and modified items.
151    for (name, new_val) in &new_items {
152        match old_items.get(name) {
153            None => changes.push(SchemaDiff {
154                change_type: format!("{item_type}_added"),
155                item_name: Some(name.clone()),
156            }),
157            Some(old_val) if old_val != new_val => changes.push(SchemaDiff {
158                change_type: format!("{item_type}_modified"),
159                item_name: Some(name.clone()),
160            }),
161            _ => {} // unchanged
162        }
163    }
164
165    // Find removed items.
166    for name in old_items.keys() {
167        if !new_items.contains_key(name) {
168            changes.push(SchemaDiff {
169                change_type: format!("{item_type}_removed"),
170                item_name: Some(name.clone()),
171            });
172        }
173    }
174
175    if changes.is_empty() {
176        // Hash changed but no named items differ — structural change.
177        changes.push(SchemaDiff {
178            change_type: "updated".to_string(),
179            item_name: None,
180        });
181    }
182
183    changes
184}
185
186// ── Internal helpers ─────────────────────────────────────────────────
187
188/// Map an MCP list method to the array key in its `result` payload.
189fn method_array_key(method: &str) -> Option<&'static str> {
190    match method {
191        "tools/list" => Some("tools"),
192        "resources/list" => Some("resources"),
193        "resources/templates/list" => Some("resourceTemplates"),
194        "prompts/list" => Some("prompts"),
195        _ => None,
196    }
197}
198
199/// Map an MCP list method to a human-readable item type label used in
200/// change records (e.g., "tool_added", "resource_removed").
201fn method_item_type(method: &str) -> &'static str {
202    match method {
203        "tools/list" => "tool",
204        "resources/list" => "resource",
205        "resources/templates/list" => "resource_template",
206        "prompts/list" => "prompt",
207        _ => "item",
208    }
209}
210
211/// Extract named items from a list payload as a map of name → JSON string.
212///
213/// MCP list items (tools, resources, prompts) have a `name` field that
214/// serves as a stable identifier for diffing.
215fn extract_named_items(payload: &Value, array_key: &str) -> HashMap<String, String> {
216    let mut map = HashMap::new();
217    if let Some(arr) = payload.get(array_key).and_then(|a| a.as_array()) {
218        for item in arr {
219            if let Some(name) = item.get("name").and_then(|n| n.as_str()) {
220                map.insert(name.to_string(), item.to_string());
221            }
222        }
223    }
224    map
225}
226
227// ── Tests ────────────────────────────────────────────────────────────
228
229#[cfg(test)]
230#[allow(non_snake_case)]
231mod tests {
232    use super::*;
233    use serde_json::json;
234
235    // ── is_schema_method ─────────────────────────────────────────────
236
237    #[test]
238    fn is_schema_method__matches_discovery() {
239        assert!(is_schema_method(&McpMethod::Initialize));
240        assert!(is_schema_method(&McpMethod::ToolsList));
241        assert!(is_schema_method(&McpMethod::ResourcesList));
242        assert!(is_schema_method(&McpMethod::ResourcesTemplatesList));
243        assert!(is_schema_method(&McpMethod::PromptsList));
244    }
245
246    #[test]
247    fn is_schema_method__rejects_non_discovery() {
248        assert!(!is_schema_method(&McpMethod::ToolsCall));
249        assert!(!is_schema_method(&McpMethod::ResourcesRead));
250        assert!(!is_schema_method(&McpMethod::PromptsGet));
251        assert!(!is_schema_method(&McpMethod::Ping));
252        assert!(!is_schema_method(&McpMethod::Initialized));
253        assert!(!is_schema_method(&McpMethod::NotificationsToolsListChanged));
254    }
255
256    // ── detect_page_status ───────────────────────────────────────────
257
258    #[test]
259    fn detect_page_status__complete() {
260        let req = json!({"method": "tools/list"});
261        let resp = json!({"result": {"tools": []}});
262        assert_eq!(detect_page_status(&req, &resp), PageStatus::Complete);
263    }
264
265    #[test]
266    fn detect_page_status__first_page() {
267        let req = json!({"method": "tools/list"});
268        let resp = json!({"result": {"tools": [], "nextCursor": "abc"}});
269        assert_eq!(detect_page_status(&req, &resp), PageStatus::FirstPage);
270    }
271
272    #[test]
273    fn detect_page_status__middle_page() {
274        let req = json!({"method": "tools/list", "params": {"cursor": "abc"}});
275        let resp = json!({"result": {"tools": [], "nextCursor": "def"}});
276        assert_eq!(detect_page_status(&req, &resp), PageStatus::MiddlePage);
277    }
278
279    #[test]
280    fn detect_page_status__last_page() {
281        let req = json!({"method": "tools/list", "params": {"cursor": "abc"}});
282        let resp = json!({"result": {"tools": []}});
283        assert_eq!(detect_page_status(&req, &resp), PageStatus::LastPage);
284    }
285
286    // ── merge_pages ──────────────────────────────────────────────────
287
288    #[test]
289    fn merge_pages__single() {
290        let page = json!({"tools": [{"name": "a"}]});
291        let result = merge_pages("tools/list", &[page.clone()]);
292        assert_eq!(result, Some(page));
293    }
294
295    #[test]
296    fn merge_pages__two_pages() {
297        let p1 = json!({"tools": [{"name": "a"}]});
298        let p2 = json!({"tools": [{"name": "b"}]});
299        let result = merge_pages("tools/list", &[p1, p2]).unwrap();
300        let tools = result["tools"].as_array().unwrap();
301        assert_eq!(tools.len(), 2);
302        assert_eq!(tools[0]["name"], "a");
303        assert_eq!(tools[1]["name"], "b");
304    }
305
306    #[test]
307    fn merge_pages__resources() {
308        let p1 = json!({"resources": [{"name": "r1", "uri": "file://a"}]});
309        let p2 = json!({"resources": [{"name": "r2", "uri": "file://b"}]});
310        let result = merge_pages("resources/list", &[p1, p2]).unwrap();
311        assert_eq!(result["resources"].as_array().unwrap().len(), 2);
312    }
313
314    #[test]
315    fn merge_pages__empty() {
316        let result = merge_pages("tools/list", &[]);
317        assert_eq!(result, None);
318    }
319
320    #[test]
321    fn merge_pages__single_strips_volatile_metadata() {
322        // Regression: Study Kit upstream returned 38 tools but produced 138
323        // schema versions because the single-page branch kept the whole
324        // raw result, including `_meta` / `serverInfo` fields that the
325        // server regenerates per request.
326        let p1 = json!({
327            "tools": [{"name": "a"}],
328            "_meta": {"requestId": "req-1"},
329            "serverInfo": {"generatedAt": "2026-04-19T00:00:00Z"}
330        });
331        let p2 = json!({
332            "tools": [{"name": "a"}],
333            "_meta": {"requestId": "req-2"},
334            "serverInfo": {"generatedAt": "2026-04-19T00:00:05Z"}
335        });
336        let r1 = merge_pages("tools/list", &[p1]).unwrap();
337        let r2 = merge_pages("tools/list", &[p2]).unwrap();
338        assert_eq!(r1, r2, "per-request metadata must not reach the hash");
339        assert_eq!(r1, json!({"tools": [{"name": "a"}]}));
340    }
341
342    #[test]
343    fn merge_pages__single_missing_array_key_yields_empty_array() {
344        let p1 = json!({"_meta": {"requestId": "x"}});
345        let result = merge_pages("tools/list", &[p1]).unwrap();
346        assert_eq!(result, json!({"tools": []}));
347    }
348
349    #[test]
350    fn merge_pages__unknown_method_single_returns_as_is() {
351        let p1 = json!({"serverInfo": {"name": "test"}});
352        let result = merge_pages("initialize", &[p1.clone()]);
353        assert_eq!(result, Some(p1));
354    }
355
356    #[test]
357    fn merge_pages__unknown_method_multi_returns_none() {
358        let p1 = json!({"serverInfo": {"name": "v1"}});
359        let p2 = json!({"serverInfo": {"name": "v2"}});
360        let result = merge_pages("initialize", &[p1, p2]);
361        assert_eq!(result, None);
362    }
363
364    // ── diff_schema ──────────────────────────────────────────────────
365
366    #[test]
367    fn diff_schema__tool_added() {
368        let old = json!({"tools": [{"name": "a", "description": "tool a"}]});
369        let new = json!({"tools": [
370            {"name": "a", "description": "tool a"},
371            {"name": "b", "description": "tool b"}
372        ]});
373        let diffs = diff_schema("tools/list", &old, &new);
374        assert_eq!(diffs.len(), 1);
375        assert_eq!(diffs[0].change_type, "tool_added");
376        assert_eq!(diffs[0].item_name.as_deref(), Some("b"));
377    }
378
379    #[test]
380    fn diff_schema__tool_removed() {
381        let old = json!({"tools": [
382            {"name": "a", "description": "tool a"},
383            {"name": "b", "description": "tool b"}
384        ]});
385        let new = json!({"tools": [{"name": "a", "description": "tool a"}]});
386        let diffs = diff_schema("tools/list", &old, &new);
387        assert_eq!(diffs.len(), 1);
388        assert_eq!(diffs[0].change_type, "tool_removed");
389        assert_eq!(diffs[0].item_name.as_deref(), Some("b"));
390    }
391
392    #[test]
393    fn diff_schema__tool_modified() {
394        let old = json!({"tools": [{"name": "a", "description": "old desc"}]});
395        let new = json!({"tools": [{"name": "a", "description": "new desc"}]});
396        let diffs = diff_schema("tools/list", &old, &new);
397        assert_eq!(diffs.len(), 1);
398        assert_eq!(diffs[0].change_type, "tool_modified");
399        assert_eq!(diffs[0].item_name.as_deref(), Some("a"));
400    }
401
402    #[test]
403    fn diff_schema__no_change() {
404        let payload = json!({"tools": [{"name": "a", "description": "tool a"}]});
405        let diffs = diff_schema("tools/list", &payload, &payload);
406        assert_eq!(diffs.len(), 1);
407        assert_eq!(diffs[0].change_type, "updated");
408        assert_eq!(diffs[0].item_name, None);
409    }
410
411    #[test]
412    fn diff_schema__multiple_changes() {
413        let old = json!({"tools": [
414            {"name": "a", "description": "old a"},
415            {"name": "b", "description": "tool b"}
416        ]});
417        let new = json!({"tools": [
418            {"name": "a", "description": "new a"},
419            {"name": "c", "description": "tool c"}
420        ]});
421        let diffs = diff_schema("tools/list", &old, &new);
422        let types: Vec<&str> = diffs.iter().map(|d| d.change_type.as_str()).collect();
423        assert!(types.contains(&"tool_modified")); // a modified
424        assert!(types.contains(&"tool_added")); // c added
425        assert!(types.contains(&"tool_removed")); // b removed
426        assert_eq!(diffs.len(), 3);
427    }
428
429    #[test]
430    fn diff_schema__initialize_returns_updated() {
431        let old = json!({"serverInfo": {"name": "test", "version": "1.0"}});
432        let new = json!({"serverInfo": {"name": "test", "version": "2.0"}});
433        let diffs = diff_schema("initialize", &old, &new);
434        assert_eq!(diffs.len(), 1);
435        assert_eq!(diffs[0].change_type, "updated");
436        assert_eq!(diffs[0].item_name, None);
437    }
438
439    #[test]
440    fn diff_schema__prompts() {
441        let old = json!({"prompts": [{"name": "summarize"}]});
442        let new = json!({"prompts": [{"name": "summarize"}, {"name": "translate"}]});
443        let diffs = diff_schema("prompts/list", &old, &new);
444        assert_eq!(diffs.len(), 1);
445        assert_eq!(diffs[0].change_type, "prompt_added");
446        assert_eq!(diffs[0].item_name.as_deref(), Some("translate"));
447    }
448
449    #[test]
450    fn diff_schema__resources() {
451        let old = json!({"resources": [
452            {"name": "file1", "uri": "file://a"},
453            {"name": "file2", "uri": "file://b"}
454        ]});
455        let new = json!({"resources": [{"name": "file1", "uri": "file://a"}]});
456        let diffs = diff_schema("resources/list", &old, &new);
457        assert_eq!(diffs.len(), 1);
458        assert_eq!(diffs[0].change_type, "resource_removed");
459        assert_eq!(diffs[0].item_name.as_deref(), Some("file2"));
460    }
461
462    // ── method_array_key ─────────────────────────────────────────────
463
464    #[test]
465    fn method_array_key__mapping() {
466        assert_eq!(method_array_key("tools/list"), Some("tools"));
467        assert_eq!(method_array_key("resources/list"), Some("resources"));
468        assert_eq!(
469            method_array_key("resources/templates/list"),
470            Some("resourceTemplates")
471        );
472        assert_eq!(method_array_key("prompts/list"), Some("prompts"));
473        assert_eq!(method_array_key("initialize"), None);
474        assert_eq!(method_array_key("tools/call"), None);
475    }
476}