Skip to main content

mcpr_core/protocol/
schema.rs

1//! MCP schema capture: types, pagination merging, and diff logic.
2//!
3//! This module understands the structure of MCP discovery responses
4//! (`initialize`, `tools/list`, `resources/list`, `prompts/list`,
5//! `resources/templates/list`) and provides:
6//!
7//! - **Pagination detection**: Determine if a response is a single page or
8//!   part of a paginated sequence (MCP cursor-based pagination).
9//! - **Page merging**: Combine paginated responses into a single snapshot.
10//! - **Schema diffing**: Compare two snapshots to detect added, removed,
11//!   and modified items (tools, resources, prompts).
12//!
13//! This is pure protocol logic — no HTTP, no storage, no hashing.
14//! The proxy and storage layers consume these functions.
15
16use std::collections::HashMap;
17
18use serde::Serialize;
19use serde_json::Value;
20
21use super::mcp::{ClientMethod, LifecycleMethod, PromptsMethod, ResourcesMethod, ToolsMethod};
22
23// ── Types ────────────────────────────────────────────────────────────
24
25/// Pagination state for an MCP list response.
26///
27/// Determined by checking `params.cursor` in the request and
28/// `result.nextCursor` in the response.
29#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
30#[serde(rename_all = "snake_case")]
31pub enum PageStatus {
32    /// Single-page response (no pagination). This is the common path.
33    Complete,
34    /// First page of a paginated response (no cursor in request, has nextCursor).
35    FirstPage,
36    /// Middle page (has cursor in request and nextCursor in response).
37    MiddlePage,
38    /// Last page (has cursor in request, no nextCursor in response).
39    LastPage,
40}
41
42/// Result of diffing two schema snapshots for a single MCP method.
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub struct SchemaDiff {
45    /// Type of change: "tool_added", "tool_removed", "tool_modified",
46    /// "resource_added", "prompt_modified", "updated", etc.
47    pub change_type: String,
48    /// Name of the affected item (e.g., "search_products"). None for
49    /// bulk changes like "updated" or "initial".
50    pub item_name: Option<String>,
51}
52
53// ── Public functions ─────────────────────────────────────────────────
54
55/// Check if an MCP method is a schema discovery method whose response
56/// should be captured.
57pub fn is_schema_method(method: &ClientMethod) -> bool {
58    matches!(
59        method,
60        ClientMethod::Lifecycle(LifecycleMethod::Initialize)
61            | ClientMethod::Tools(ToolsMethod::List)
62            | ClientMethod::Resources(ResourcesMethod::List)
63            | ClientMethod::Resources(ResourcesMethod::TemplatesList)
64            | ClientMethod::Prompts(PromptsMethod::List)
65    )
66}
67
68/// Determine pagination status from the request body and response body.
69///
70/// MCP pagination uses cursor-based paging:
71/// - Request `params.cursor` present → continuing from a previous page.
72/// - Response `result.nextCursor` present → more pages available.
73pub fn detect_page_status(request_body: &Value, response_body: &Value) -> PageStatus {
74    let req_has_cursor = request_body
75        .get("params")
76        .and_then(|p| p.get("cursor"))
77        .and_then(|c| c.as_str())
78        .is_some();
79
80    let resp_has_next_cursor = response_body
81        .get("result")
82        .and_then(|r| r.get("nextCursor"))
83        .and_then(|c| c.as_str())
84        .is_some();
85
86    match (req_has_cursor, resp_has_next_cursor) {
87        (false, false) => PageStatus::Complete,
88        (false, true) => PageStatus::FirstPage,
89        (true, true) => PageStatus::MiddlePage,
90        (true, false) => PageStatus::LastPage,
91    }
92}
93
94/// Merge paginated list responses into a single combined `result` payload.
95///
96/// Each page is the `result` field from a JSON-RPC response. This function
97/// merges the array field (tools, resources, resourceTemplates, prompts)
98/// across all pages into a single value.
99///
100/// Returns `None` if pages is empty or the method has no array key.
101pub fn merge_pages(method: &str, pages: &[Value]) -> Option<Value> {
102    if pages.is_empty() {
103        return None;
104    }
105
106    // List methods (tools/list, resources/list, …) must extract only the
107    // named array so per-request metadata (`_meta`, server-generated
108    // request ids, etc.) does not leak into the hash and produce
109    // phantom versions. Non-list methods (initialize) retain the raw
110    // page — they have no array to project.
111    let Some(array_key) = method_array_key(method) else {
112        return (pages.len() == 1).then(|| pages[0].clone());
113    };
114
115    let mut merged_array: Vec<Value> = Vec::new();
116    for page in pages {
117        if let Some(arr) = page.get(array_key).and_then(|a| a.as_array()) {
118            merged_array.extend(arr.iter().cloned());
119        }
120    }
121
122    Some(serde_json::json!({ array_key: merged_array }))
123}
124
125/// Diff two schema payloads for a list method.
126///
127/// Compares named items (by their `name` field) and returns granular
128/// changes: added, removed, and modified items.
129///
130/// For methods without named items (e.g., `initialize`), returns a
131/// single "updated" diff if the payloads differ.
132pub fn diff_schema(method: &str, old_payload: &Value, new_payload: &Value) -> Vec<SchemaDiff> {
133    let array_key = match method_array_key(method) {
134        Some(key) => key,
135        None => {
136            // Non-list method (e.g., initialize) — no granular diff.
137            return vec![SchemaDiff {
138                change_type: "updated".to_string(),
139                item_name: None,
140            }];
141        }
142    };
143
144    let item_type = method_item_type(method);
145    let old_items = extract_named_items(old_payload, array_key);
146    let new_items = extract_named_items(new_payload, array_key);
147
148    let mut changes = Vec::new();
149
150    // Find added and modified items.
151    for (name, new_val) in &new_items {
152        match old_items.get(name) {
153            None => changes.push(SchemaDiff {
154                change_type: format!("{item_type}_added"),
155                item_name: Some(name.clone()),
156            }),
157            Some(old_val) if old_val != new_val => changes.push(SchemaDiff {
158                change_type: format!("{item_type}_modified"),
159                item_name: Some(name.clone()),
160            }),
161            _ => {} // unchanged
162        }
163    }
164
165    // Find removed items.
166    for name in old_items.keys() {
167        if !new_items.contains_key(name) {
168            changes.push(SchemaDiff {
169                change_type: format!("{item_type}_removed"),
170                item_name: Some(name.clone()),
171            });
172        }
173    }
174
175    if changes.is_empty() {
176        // Hash changed but no named items differ — structural change.
177        changes.push(SchemaDiff {
178            change_type: "updated".to_string(),
179            item_name: None,
180        });
181    }
182
183    changes
184}
185
186// ── Internal helpers ─────────────────────────────────────────────────
187
188/// Map an MCP list method to the array key in its `result` payload.
189fn method_array_key(method: &str) -> Option<&'static str> {
190    match method {
191        "tools/list" => Some("tools"),
192        "resources/list" => Some("resources"),
193        "resources/templates/list" => Some("resourceTemplates"),
194        "prompts/list" => Some("prompts"),
195        _ => None,
196    }
197}
198
199/// Map an MCP list method to a human-readable item type label used in
200/// change records (e.g., "tool_added", "resource_removed").
201fn method_item_type(method: &str) -> &'static str {
202    match method {
203        "tools/list" => "tool",
204        "resources/list" => "resource",
205        "resources/templates/list" => "resource_template",
206        "prompts/list" => "prompt",
207        _ => "item",
208    }
209}
210
211/// Extract named items from a list payload as a map of name → JSON string.
212///
213/// MCP list items (tools, resources, prompts) have a `name` field that
214/// serves as a stable identifier for diffing.
215fn extract_named_items(payload: &Value, array_key: &str) -> HashMap<String, String> {
216    let mut map = HashMap::new();
217    if let Some(arr) = payload.get(array_key).and_then(|a| a.as_array()) {
218        for item in arr {
219            if let Some(name) = item.get("name").and_then(|n| n.as_str()) {
220                map.insert(name.to_string(), item.to_string());
221            }
222        }
223    }
224    map
225}
226
227// ── Tests ────────────────────────────────────────────────────────────
228
229#[cfg(test)]
230#[allow(non_snake_case)]
231mod tests {
232    use super::*;
233    use serde_json::json;
234
235    // ── is_schema_method ─────────────────────────────────────────────
236
237    #[test]
238    fn is_schema_method__matches_discovery() {
239        assert!(is_schema_method(&ClientMethod::Lifecycle(
240            LifecycleMethod::Initialize
241        )));
242        assert!(is_schema_method(&ClientMethod::Tools(ToolsMethod::List)));
243        assert!(is_schema_method(&ClientMethod::Resources(
244            ResourcesMethod::List
245        )));
246        assert!(is_schema_method(&ClientMethod::Resources(
247            ResourcesMethod::TemplatesList
248        )));
249        assert!(is_schema_method(&ClientMethod::Prompts(
250            PromptsMethod::List
251        )));
252    }
253
254    #[test]
255    fn is_schema_method__rejects_non_discovery() {
256        assert!(!is_schema_method(&ClientMethod::Tools(ToolsMethod::Call)));
257        assert!(!is_schema_method(&ClientMethod::Resources(
258            ResourcesMethod::Read
259        )));
260        assert!(!is_schema_method(&ClientMethod::Prompts(
261            PromptsMethod::Get
262        )));
263        assert!(!is_schema_method(&ClientMethod::Ping));
264        // Notifications have a separate enum; is_schema_method only
265        // accepts ClientMethod (request-side), so they can't even be
266        // constructed here. That's the type-level guarantee.
267    }
268
269    // ── detect_page_status ───────────────────────────────────────────
270
271    #[test]
272    fn detect_page_status__complete() {
273        let req = json!({"method": "tools/list"});
274        let resp = json!({"result": {"tools": []}});
275        assert_eq!(detect_page_status(&req, &resp), PageStatus::Complete);
276    }
277
278    #[test]
279    fn detect_page_status__first_page() {
280        let req = json!({"method": "tools/list"});
281        let resp = json!({"result": {"tools": [], "nextCursor": "abc"}});
282        assert_eq!(detect_page_status(&req, &resp), PageStatus::FirstPage);
283    }
284
285    #[test]
286    fn detect_page_status__middle_page() {
287        let req = json!({"method": "tools/list", "params": {"cursor": "abc"}});
288        let resp = json!({"result": {"tools": [], "nextCursor": "def"}});
289        assert_eq!(detect_page_status(&req, &resp), PageStatus::MiddlePage);
290    }
291
292    #[test]
293    fn detect_page_status__last_page() {
294        let req = json!({"method": "tools/list", "params": {"cursor": "abc"}});
295        let resp = json!({"result": {"tools": []}});
296        assert_eq!(detect_page_status(&req, &resp), PageStatus::LastPage);
297    }
298
299    // ── merge_pages ──────────────────────────────────────────────────
300
301    #[test]
302    fn merge_pages__single() {
303        let page = json!({"tools": [{"name": "a"}]});
304        let result = merge_pages("tools/list", std::slice::from_ref(&page));
305        assert_eq!(result, Some(page));
306    }
307
308    #[test]
309    fn merge_pages__two_pages() {
310        let p1 = json!({"tools": [{"name": "a"}]});
311        let p2 = json!({"tools": [{"name": "b"}]});
312        let result = merge_pages("tools/list", &[p1, p2]).unwrap();
313        let tools = result["tools"].as_array().unwrap();
314        assert_eq!(tools.len(), 2);
315        assert_eq!(tools[0]["name"], "a");
316        assert_eq!(tools[1]["name"], "b");
317    }
318
319    #[test]
320    fn merge_pages__resources() {
321        let p1 = json!({"resources": [{"name": "r1", "uri": "file://a"}]});
322        let p2 = json!({"resources": [{"name": "r2", "uri": "file://b"}]});
323        let result = merge_pages("resources/list", &[p1, p2]).unwrap();
324        assert_eq!(result["resources"].as_array().unwrap().len(), 2);
325    }
326
327    #[test]
328    fn merge_pages__empty() {
329        let result = merge_pages("tools/list", &[]);
330        assert_eq!(result, None);
331    }
332
333    #[test]
334    fn merge_pages__single_strips_volatile_metadata() {
335        // Regression: Study Kit upstream returned 38 tools but produced 138
336        // schema versions because the single-page branch kept the whole
337        // raw result, including `_meta` / `serverInfo` fields that the
338        // server regenerates per request.
339        let p1 = json!({
340            "tools": [{"name": "a"}],
341            "_meta": {"requestId": "req-1"},
342            "serverInfo": {"generatedAt": "2026-04-19T00:00:00Z"}
343        });
344        let p2 = json!({
345            "tools": [{"name": "a"}],
346            "_meta": {"requestId": "req-2"},
347            "serverInfo": {"generatedAt": "2026-04-19T00:00:05Z"}
348        });
349        let r1 = merge_pages("tools/list", &[p1]).unwrap();
350        let r2 = merge_pages("tools/list", &[p2]).unwrap();
351        assert_eq!(r1, r2, "per-request metadata must not reach the hash");
352        assert_eq!(r1, json!({"tools": [{"name": "a"}]}));
353    }
354
355    #[test]
356    fn merge_pages__single_missing_array_key_yields_empty_array() {
357        let p1 = json!({"_meta": {"requestId": "x"}});
358        let result = merge_pages("tools/list", &[p1]).unwrap();
359        assert_eq!(result, json!({"tools": []}));
360    }
361
362    #[test]
363    fn merge_pages__unknown_method_single_returns_as_is() {
364        let p1 = json!({"serverInfo": {"name": "test"}});
365        let result = merge_pages("initialize", std::slice::from_ref(&p1));
366        assert_eq!(result, Some(p1));
367    }
368
369    #[test]
370    fn merge_pages__unknown_method_multi_returns_none() {
371        let p1 = json!({"serverInfo": {"name": "v1"}});
372        let p2 = json!({"serverInfo": {"name": "v2"}});
373        let result = merge_pages("initialize", &[p1, p2]);
374        assert_eq!(result, None);
375    }
376
377    // ── diff_schema ──────────────────────────────────────────────────
378
379    #[test]
380    fn diff_schema__tool_added() {
381        let old = json!({"tools": [{"name": "a", "description": "tool a"}]});
382        let new = json!({"tools": [
383            {"name": "a", "description": "tool a"},
384            {"name": "b", "description": "tool b"}
385        ]});
386        let diffs = diff_schema("tools/list", &old, &new);
387        assert_eq!(diffs.len(), 1);
388        assert_eq!(diffs[0].change_type, "tool_added");
389        assert_eq!(diffs[0].item_name.as_deref(), Some("b"));
390    }
391
392    #[test]
393    fn diff_schema__tool_removed() {
394        let old = json!({"tools": [
395            {"name": "a", "description": "tool a"},
396            {"name": "b", "description": "tool b"}
397        ]});
398        let new = json!({"tools": [{"name": "a", "description": "tool a"}]});
399        let diffs = diff_schema("tools/list", &old, &new);
400        assert_eq!(diffs.len(), 1);
401        assert_eq!(diffs[0].change_type, "tool_removed");
402        assert_eq!(diffs[0].item_name.as_deref(), Some("b"));
403    }
404
405    #[test]
406    fn diff_schema__tool_modified() {
407        let old = json!({"tools": [{"name": "a", "description": "old desc"}]});
408        let new = json!({"tools": [{"name": "a", "description": "new desc"}]});
409        let diffs = diff_schema("tools/list", &old, &new);
410        assert_eq!(diffs.len(), 1);
411        assert_eq!(diffs[0].change_type, "tool_modified");
412        assert_eq!(diffs[0].item_name.as_deref(), Some("a"));
413    }
414
415    #[test]
416    fn diff_schema__no_change() {
417        let payload = json!({"tools": [{"name": "a", "description": "tool a"}]});
418        let diffs = diff_schema("tools/list", &payload, &payload);
419        assert_eq!(diffs.len(), 1);
420        assert_eq!(diffs[0].change_type, "updated");
421        assert_eq!(diffs[0].item_name, None);
422    }
423
424    #[test]
425    fn diff_schema__multiple_changes() {
426        let old = json!({"tools": [
427            {"name": "a", "description": "old a"},
428            {"name": "b", "description": "tool b"}
429        ]});
430        let new = json!({"tools": [
431            {"name": "a", "description": "new a"},
432            {"name": "c", "description": "tool c"}
433        ]});
434        let diffs = diff_schema("tools/list", &old, &new);
435        let types: Vec<&str> = diffs.iter().map(|d| d.change_type.as_str()).collect();
436        assert!(types.contains(&"tool_modified")); // a modified
437        assert!(types.contains(&"tool_added")); // c added
438        assert!(types.contains(&"tool_removed")); // b removed
439        assert_eq!(diffs.len(), 3);
440    }
441
442    #[test]
443    fn diff_schema__initialize_returns_updated() {
444        let old = json!({"serverInfo": {"name": "test", "version": "1.0"}});
445        let new = json!({"serverInfo": {"name": "test", "version": "2.0"}});
446        let diffs = diff_schema("initialize", &old, &new);
447        assert_eq!(diffs.len(), 1);
448        assert_eq!(diffs[0].change_type, "updated");
449        assert_eq!(diffs[0].item_name, None);
450    }
451
452    #[test]
453    fn diff_schema__prompts() {
454        let old = json!({"prompts": [{"name": "summarize"}]});
455        let new = json!({"prompts": [{"name": "summarize"}, {"name": "translate"}]});
456        let diffs = diff_schema("prompts/list", &old, &new);
457        assert_eq!(diffs.len(), 1);
458        assert_eq!(diffs[0].change_type, "prompt_added");
459        assert_eq!(diffs[0].item_name.as_deref(), Some("translate"));
460    }
461
462    #[test]
463    fn diff_schema__resources() {
464        let old = json!({"resources": [
465            {"name": "file1", "uri": "file://a"},
466            {"name": "file2", "uri": "file://b"}
467        ]});
468        let new = json!({"resources": [{"name": "file1", "uri": "file://a"}]});
469        let diffs = diff_schema("resources/list", &old, &new);
470        assert_eq!(diffs.len(), 1);
471        assert_eq!(diffs[0].change_type, "resource_removed");
472        assert_eq!(diffs[0].item_name.as_deref(), Some("file2"));
473    }
474
475    // ── method_array_key ─────────────────────────────────────────────
476
477    #[test]
478    fn method_array_key__mapping() {
479        assert_eq!(method_array_key("tools/list"), Some("tools"));
480        assert_eq!(method_array_key("resources/list"), Some("resources"));
481        assert_eq!(
482            method_array_key("resources/templates/list"),
483            Some("resourceTemplates")
484        );
485        assert_eq!(method_array_key("prompts/list"), Some("prompts"));
486        assert_eq!(method_array_key("initialize"), None);
487        assert_eq!(method_array_key("tools/call"), None);
488    }
489}