oxify_mcp/servers/
web.rs

1//! Web MCP server - provides HTTP and web scraping operations
2
3use crate::{McpServer, Result};
4use async_trait::async_trait;
5use serde_json::{json, Value};
6
7/// Built-in MCP server for web operations
8pub struct WebServer {
9    client: reqwest::Client,
10    /// Maximum response size in bytes (default: 10MB)
11    max_response_size: usize,
12}
13
14impl WebServer {
15    /// Create a new web server
16    pub fn new() -> Self {
17        Self {
18            client: reqwest::Client::builder()
19                .user_agent("OxiFY-MCP/0.1.0")
20                .timeout(std::time::Duration::from_secs(30))
21                .build()
22                .unwrap(),
23            max_response_size: 10 * 1024 * 1024, // 10MB
24        }
25    }
26
27    /// Set maximum response size
28    pub fn with_max_response_size(mut self, size: usize) -> Self {
29        self.max_response_size = size;
30        self
31    }
32}
33
34impl Default for WebServer {
35    fn default() -> Self {
36        Self::new()
37    }
38}
39
40#[async_trait]
41impl McpServer for WebServer {
42    async fn call_tool(&self, name: &str, arguments: Value) -> Result<Value> {
43        match name {
44            "http_get" => {
45                let url = arguments["url"]
46                    .as_str()
47                    .ok_or_else(|| crate::McpError::InvalidRequest("Missing 'url'".to_string()))?;
48
49                let response = self
50                    .client
51                    .get(url)
52                    .send()
53                    .await
54                    .map_err(|e| crate::McpError::ToolExecutionError(e.to_string()))?;
55
56                let status = response.status().as_u16();
57                let headers: Vec<(String, String)> = response
58                    .headers()
59                    .iter()
60                    .map(|(k, v)| (k.to_string(), v.to_str().unwrap_or("").to_string()))
61                    .collect();
62
63                let body = response
64                    .text()
65                    .await
66                    .map_err(|e| crate::McpError::ToolExecutionError(e.to_string()))?;
67
68                // Truncate if too large
69                let body = if body.len() > self.max_response_size {
70                    format!("{}...[truncated]", &body[..self.max_response_size])
71                } else {
72                    body
73                };
74
75                Ok(json!({
76                    "status": status,
77                    "headers": headers,
78                    "body": body,
79                }))
80            }
81
82            "http_post" => {
83                let url = arguments["url"]
84                    .as_str()
85                    .ok_or_else(|| crate::McpError::InvalidRequest("Missing 'url'".to_string()))?;
86                let body = arguments["body"].as_str().unwrap_or("");
87                let content_type = arguments["content_type"]
88                    .as_str()
89                    .unwrap_or("application/json");
90
91                let response = self
92                    .client
93                    .post(url)
94                    .header("Content-Type", content_type)
95                    .body(body.to_string())
96                    .send()
97                    .await
98                    .map_err(|e| crate::McpError::ToolExecutionError(e.to_string()))?;
99
100                let status = response.status().as_u16();
101                let response_body = response
102                    .text()
103                    .await
104                    .map_err(|e| crate::McpError::ToolExecutionError(e.to_string()))?;
105
106                Ok(json!({
107                    "status": status,
108                    "body": response_body,
109                }))
110            }
111
112            "web_scrape" => {
113                let url = arguments["url"]
114                    .as_str()
115                    .ok_or_else(|| crate::McpError::InvalidRequest("Missing 'url'".to_string()))?;
116                let selector = arguments.get("selector").and_then(|v| v.as_str());
117
118                let response = self
119                    .client
120                    .get(url)
121                    .send()
122                    .await
123                    .map_err(|e| crate::McpError::ToolExecutionError(e.to_string()))?;
124
125                let html = response
126                    .text()
127                    .await
128                    .map_err(|e| crate::McpError::ToolExecutionError(e.to_string()))?;
129
130                // Basic HTML to text conversion (simple implementation)
131                // In production, use a proper HTML parser like scraper or html2text
132                let text = if let Some(_css_selector) = selector {
133                    // TODO: Implement CSS selector parsing with scraper crate
134                    html
135                } else {
136                    // Simple HTML tag removal
137                    html.replace("<script", "\n<script")
138                        .replace("<style", "\n<style")
139                        .lines()
140                        .filter(|line| !line.trim_start().starts_with("<script"))
141                        .filter(|line| !line.trim_start().starts_with("<style"))
142                        .collect::<Vec<_>>()
143                        .join("\n")
144                };
145
146                Ok(json!({
147                    "url": url,
148                    "text": text,
149                    "length": text.len(),
150                }))
151            }
152
153            "web_screenshot" => {
154                // TODO: Implement headless browser screenshot
155                // Requires puppeteer/playwright integration
156                Err(crate::McpError::ToolExecutionError(
157                    "Screenshot not yet implemented. Requires headless browser.".to_string(),
158                ))
159            }
160
161            _ => Err(crate::McpError::ToolNotFound(name.to_string())),
162        }
163    }
164
165    async fn list_tools(&self) -> Result<Vec<Value>> {
166        Ok(vec![
167            json!({
168                "name": "http_get",
169                "description": "Perform HTTP GET request",
170                "inputSchema": {
171                    "type": "object",
172                    "properties": {
173                        "url": {
174                            "type": "string",
175                            "description": "URL to fetch"
176                        }
177                    },
178                    "required": ["url"]
179                }
180            }),
181            json!({
182                "name": "http_post",
183                "description": "Perform HTTP POST request",
184                "inputSchema": {
185                    "type": "object",
186                    "properties": {
187                        "url": {
188                            "type": "string",
189                            "description": "URL to post to"
190                        },
191                        "body": {
192                            "type": "string",
193                            "description": "Request body"
194                        },
195                        "content_type": {
196                            "type": "string",
197                            "description": "Content-Type header",
198                            "default": "application/json"
199                        }
200                    },
201                    "required": ["url"]
202                }
203            }),
204            json!({
205                "name": "web_scrape",
206                "description": "Scrape web page content",
207                "inputSchema": {
208                    "type": "object",
209                    "properties": {
210                        "url": {
211                            "type": "string",
212                            "description": "URL to scrape"
213                        },
214                        "selector": {
215                            "type": "string",
216                            "description": "CSS selector (optional)"
217                        }
218                    },
219                    "required": ["url"]
220                }
221            }),
222            json!({
223                "name": "web_screenshot",
224                "description": "Take screenshot of web page (not yet implemented)",
225                "inputSchema": {
226                    "type": "object",
227                    "properties": {
228                        "url": {
229                            "type": "string",
230                            "description": "URL to screenshot"
231                        }
232                    },
233                    "required": ["url"]
234                }
235            }),
236        ])
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243    use serde_json::json;
244
245    #[tokio::test]
246    async fn test_web_server_creation() {
247        let server = WebServer::new();
248        let tools = server.list_tools().await.unwrap();
249        assert_eq!(tools.len(), 4);
250    }
251
252    #[tokio::test]
253    async fn test_web_server_with_max_response_size() {
254        let server = WebServer::new().with_max_response_size(1024);
255        assert_eq!(server.max_response_size, 1024);
256    }
257
258    #[tokio::test]
259    async fn test_web_list_tools() {
260        let server = WebServer::new();
261        let tools = server.list_tools().await.unwrap();
262
263        assert!(tools.iter().any(|t| t["name"] == "http_get"));
264        assert!(tools.iter().any(|t| t["name"] == "http_post"));
265        assert!(tools.iter().any(|t| t["name"] == "web_scrape"));
266        assert!(tools.iter().any(|t| t["name"] == "web_screenshot"));
267    }
268
269    #[tokio::test]
270    async fn test_web_screenshot_not_implemented() {
271        let server = WebServer::new();
272
273        let result = server
274            .call_tool(
275                "web_screenshot",
276                json!({
277                    "url": "https://example.com"
278                }),
279            )
280            .await;
281
282        assert!(result.is_err());
283        if let Err(e) = result {
284            assert!(e.to_string().contains("not yet implemented"));
285        }
286    }
287
288    #[tokio::test]
289    async fn test_web_invalid_tool() {
290        let server = WebServer::new();
291
292        let result = server.call_tool("nonexistent_tool", json!({})).await;
293
294        assert!(result.is_err());
295    }
296
297    #[tokio::test]
298    async fn test_web_http_get_missing_url() {
299        let server = WebServer::new();
300
301        let result = server.call_tool("http_get", json!({})).await;
302
303        assert!(result.is_err());
304        if let Err(e) = result {
305            assert!(e.to_string().contains("url"));
306        }
307    }
308
309    #[tokio::test]
310    async fn test_web_http_post_missing_url() {
311        let server = WebServer::new();
312
313        let result = server
314            .call_tool(
315                "http_post",
316                json!({
317                    "body": "test"
318                }),
319            )
320            .await;
321
322        assert!(result.is_err());
323    }
324
325    #[tokio::test]
326    async fn test_web_scrape_missing_url() {
327        let server = WebServer::new();
328
329        let result = server.call_tool("web_scrape", json!({})).await;
330
331        assert!(result.is_err());
332    }
333
334    // Note: The following tests require a real HTTP server
335    // They are commented out but show how to test with real requests
336
337    /*
338    #[tokio::test]
339    async fn test_http_get_real() {
340        let server = WebServer::new();
341
342        let result = server
343            .call_tool(
344                "http_get",
345                json!({
346                    "url": "https://httpbin.org/get"
347                }),
348            )
349            .await
350            .unwrap();
351
352        assert_eq!(result["status"], 200);
353        assert!(result["body"].as_str().unwrap().len() > 0);
354    }
355
356    #[tokio::test]
357    async fn test_http_post_real() {
358        let server = WebServer::new();
359
360        let result = server
361            .call_tool(
362                "http_post",
363                json!({
364                    "url": "https://httpbin.org/post",
365                    "body": "{\"test\": \"data\"}",
366                    "content_type": "application/json"
367                }),
368            )
369            .await
370            .unwrap();
371
372        assert_eq!(result["status"], 200);
373    }
374
375    #[tokio::test]
376    async fn test_web_scrape_real() {
377        let server = WebServer::new();
378
379        let result = server
380            .call_tool(
381                "web_scrape",
382                json!({
383                    "url": "https://example.com"
384                }),
385            )
386            .await
387            .unwrap();
388
389        assert!(result["text"].as_str().unwrap().contains("Example Domain"));
390    }
391    */
392}