Skip to main content

crw_core/
mcp.rs

1//! Shared MCP (Model Context Protocol) JSON-RPC types and tool definitions.
2//!
3//! Used by both the HTTP MCP endpoint (`crw-server`) and the stdio MCP proxy (`crw-mcp`).
4
5use serde::{Deserialize, Serialize};
6use serde_json::{Value, json};
7
8pub const PROTOCOL_VERSION: &str = "2024-11-05";
9
10// --- JSON-RPC types ---
11
12#[derive(Deserialize)]
13pub struct JsonRpcRequest {
14    pub jsonrpc: String,
15    pub id: Option<Value>,
16    pub method: String,
17    #[serde(default)]
18    pub params: Value,
19}
20
21#[derive(Serialize)]
22pub struct JsonRpcResponse {
23    pub jsonrpc: String,
24    pub id: Value,
25    #[serde(skip_serializing_if = "Option::is_none")]
26    pub result: Option<Value>,
27    #[serde(skip_serializing_if = "Option::is_none")]
28    pub error: Option<JsonRpcError>,
29}
30
31#[derive(Serialize)]
32pub struct JsonRpcError {
33    pub code: i64,
34    pub message: String,
35}
36
37impl JsonRpcResponse {
38    pub fn success(id: Value, result: Value) -> Self {
39        Self {
40            jsonrpc: "2.0".into(),
41            id,
42            result: Some(result),
43            error: None,
44        }
45    }
46
47    pub fn error(id: Value, code: i64, message: String) -> Self {
48        Self {
49            jsonrpc: "2.0".into(),
50            id,
51            result: None,
52            error: Some(JsonRpcError { code, message }),
53        }
54    }
55}
56
57// --- Tool definitions ---
58
59pub fn tool_definitions() -> Value {
60    json!({
61        "tools": [
62            {
63                "name": "crw_scrape",
64                "description": "Scrape a single URL and return its content as markdown, HTML, or links. Use this to extract content from any web page.",
65                "inputSchema": {
66                    "type": "object",
67                    "properties": {
68                        "url": {
69                            "type": "string",
70                            "description": "The URL to scrape"
71                        },
72                        "formats": {
73                            "type": "array",
74                            "items": { "type": "string", "enum": ["markdown", "html", "links"] },
75                            "description": "Output formats (default: [\"markdown\"])"
76                        },
77                        "onlyMainContent": {
78                            "type": "boolean",
79                            "description": "Extract only the main content, removing nav/footer/etc (default: true)"
80                        },
81                        "includeTags": {
82                            "type": "array",
83                            "items": { "type": "string" },
84                            "description": "CSS selectors to include (only content matching these selectors)"
85                        },
86                        "excludeTags": {
87                            "type": "array",
88                            "items": { "type": "string" },
89                            "description": "CSS selectors to exclude from output"
90                        }
91                    },
92                    "required": ["url"]
93                }
94            },
95            {
96                "name": "crw_crawl",
97                "description": "Start an async crawl of a website. Returns a job ID that can be polled with crw_check_crawl_status.",
98                "inputSchema": {
99                    "type": "object",
100                    "properties": {
101                        "url": {
102                            "type": "string",
103                            "description": "The starting URL to crawl"
104                        },
105                        "maxDepth": {
106                            "type": "integer",
107                            "description": "Maximum crawl depth (default: 2)"
108                        },
109                        "maxPages": {
110                            "type": "integer",
111                            "description": "Maximum number of pages to crawl (default: 10)"
112                        },
113                        "jsonSchema": {
114                            "type": "object",
115                            "description": "JSON schema for LLM-based structured data extraction on each crawled page"
116                        }
117                    },
118                    "required": ["url"]
119                }
120            },
121            {
122                "name": "crw_check_crawl_status",
123                "description": "Check the status of an async crawl job and retrieve results.",
124                "inputSchema": {
125                    "type": "object",
126                    "properties": {
127                        "id": {
128                            "type": "string",
129                            "description": "The crawl job ID returned by crw_crawl"
130                        }
131                    },
132                    "required": ["id"]
133                }
134            },
135            {
136                "name": "crw_map",
137                "description": "Discover URLs on a website by crawling and/or reading its sitemap.",
138                "inputSchema": {
139                    "type": "object",
140                    "properties": {
141                        "url": {
142                            "type": "string",
143                            "description": "The URL to map"
144                        },
145                        "maxDepth": {
146                            "type": "integer",
147                            "description": "Maximum crawl depth for discovery (default: 2)"
148                        },
149                        "useSitemap": {
150                            "type": "boolean",
151                            "description": "Whether to use the site's sitemap.xml (default: true)"
152                        }
153                    },
154                    "required": ["url"]
155                }
156            }
157        ]
158    })
159}
160
161/// Result of handling a protocol method.
162pub enum ProtocolResult {
163    /// Send this response back to the client.
164    Response(JsonRpcResponse),
165    /// Notification — no response needed.
166    Notification,
167    /// Not a protocol method — caller should handle it.
168    NotHandled,
169}
170
171/// Handle common MCP protocol methods (initialize, tools/list, ping, notifications).
172pub fn handle_protocol_method(
173    server_name: &str,
174    server_version: &str,
175    req: &JsonRpcRequest,
176) -> ProtocolResult {
177    if req.jsonrpc != "2.0" {
178        let id = req.id.clone().unwrap_or(Value::Null);
179        return ProtocolResult::Response(JsonRpcResponse::error(
180            id,
181            -32600,
182            "invalid jsonrpc version".into(),
183        ));
184    }
185
186    match req.method.as_str() {
187        "notifications/initialized" | "notifications/cancelled" => ProtocolResult::Notification,
188
189        "initialize" => {
190            let id = req.id.clone().unwrap_or(Value::Null);
191            ProtocolResult::Response(JsonRpcResponse::success(
192                id,
193                json!({
194                    "protocolVersion": PROTOCOL_VERSION,
195                    "capabilities": { "tools": {} },
196                    "serverInfo": {
197                        "name": server_name,
198                        "version": server_version
199                    }
200                }),
201            ))
202        }
203
204        "tools/list" => {
205            let id = req.id.clone().unwrap_or(Value::Null);
206            ProtocolResult::Response(JsonRpcResponse::success(id, tool_definitions()))
207        }
208
209        "ping" => {
210            let id = req.id.clone().unwrap_or(Value::Null);
211            ProtocolResult::Response(JsonRpcResponse::success(id, json!({})))
212        }
213
214        _ => ProtocolResult::NotHandled,
215    }
216}
217
218/// Wrap a tool call result into an MCP-compliant content response.
219pub fn tool_result_response(id: Value, result: Result<Value, String>) -> JsonRpcResponse {
220    match result {
221        Ok(value) => {
222            let text = serde_json::to_string_pretty(&value).unwrap_or_default();
223            JsonRpcResponse::success(
224                id,
225                json!({
226                    "content": [{"type": "text", "text": text}]
227                }),
228            )
229        }
230        Err(e) => JsonRpcResponse::success(
231            id,
232            json!({
233                "content": [{"type": "text", "text": e}],
234                "isError": true
235            }),
236        ),
237    }
238}