1use crate::browser::{BrowserController, CaptureFormat, CaptureOptions, PageCapture};
6use crate::error::Result;
7use crate::extraction::{ContentExtractor, LinkExtractor, MetadataExtractor};
8use crate::mcp::types::{McpToolDefinition, ToolCallResult, ToolContent};
9use serde_json::{json, Value};
10use std::collections::HashMap;
11use std::sync::Arc;
12use tokio::sync::RwLock;
13use tracing::{error, info, instrument};
14
15pub trait McpTool: Send + Sync {
17 fn name(&self) -> &str;
19 fn description(&self) -> &str;
21 fn input_schema(&self) -> Value;
23 fn definition(&self) -> McpToolDefinition {
25 McpToolDefinition {
26 name: self.name().to_string(),
27 description: self.description().to_string(),
28 input_schema: self.input_schema(),
29 }
30 }
31}
32
33pub struct ToolRegistry {
35 tools: HashMap<String, Box<dyn McpTool>>,
36 #[allow(dead_code)]
37 browser: Arc<RwLock<Option<BrowserController>>>,
38}
39
40impl ToolRegistry {
41 pub fn new() -> Self {
43 let mut registry = Self {
44 tools: HashMap::new(),
45 browser: Arc::new(RwLock::new(None)),
46 };
47
48 registry.register(Box::new(WebNavigateTool));
50 registry.register(Box::new(WebScreenshotTool));
51 registry.register(Box::new(WebPdfTool));
52 registry.register(Box::new(WebExtractContentTool));
53 registry.register(Box::new(WebExtractLinksTool));
54 registry.register(Box::new(WebExtractMetadataTool));
55 registry.register(Box::new(WebExecuteJsTool));
56 registry.register(Box::new(WebCaptureMhtmlTool));
57
58 registry
59 }
60
61 pub fn register(&mut self, tool: Box<dyn McpTool>) {
63 self.tools.insert(tool.name().to_string(), tool);
64 }
65
66 pub fn definitions(&self) -> Vec<McpToolDefinition> {
68 self.tools.values().map(|t| t.definition()).collect()
69 }
70
71 #[instrument(skip(self, args))]
73 pub async fn execute(&self, name: &str, args: Value) -> ToolCallResult {
74 info!("Executing tool: {}", name);
75
76 if !self.tools.contains_key(name) {
77 return ToolCallResult::error(format!("Tool not found: {}", name));
78 }
79
80 let browser = self.get_or_create_browser().await;
82 let browser = match browser {
83 Ok(b) => b,
84 Err(e) => return ToolCallResult::error(format!("Failed to create browser: {}", e)),
85 };
86
87 match name {
88 "web_navigate" => self.execute_navigate(&browser, args).await,
89 "web_screenshot" => self.execute_screenshot(&browser, args).await,
90 "web_pdf" => self.execute_pdf(&browser, args).await,
91 "web_extract_content" => self.execute_extract_content(&browser, args).await,
92 "web_extract_links" => self.execute_extract_links(&browser, args).await,
93 "web_extract_metadata" => self.execute_extract_metadata(&browser, args).await,
94 "web_execute_js" => self.execute_js(&browser, args).await,
95 "web_capture_mhtml" => self.execute_capture_mhtml(&browser, args).await,
96 _ => ToolCallResult::error(format!("Unknown tool: {}", name)),
97 }
98 }
99
100 async fn get_or_create_browser(&self) -> Result<BrowserController> {
102 BrowserController::new().await
105 }
106
107 async fn execute_navigate(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
108 let url = match args.get("url").and_then(|v| v.as_str()) {
109 Some(u) => u,
110 None => return ToolCallResult::error("Missing required parameter: url"),
111 };
112
113 match browser.navigate(url).await {
114 Ok(page) => {
115 let current_url = page.url().await;
116 ToolCallResult::text(format!("Successfully navigated to: {}", current_url))
117 }
118 Err(e) => {
119 error!("Navigation failed: {}", e);
120 ToolCallResult::error(format!("Navigation failed: {}", e))
121 }
122 }
123 }
124
125 async fn execute_screenshot(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
126 let url = match args.get("url").and_then(|v| v.as_str()) {
127 Some(u) => u,
128 None => return ToolCallResult::error("Missing required parameter: url"),
129 };
130
131 let full_page = args
132 .get("fullPage")
133 .and_then(|v| v.as_bool())
134 .unwrap_or(true);
135 let format_str = args.get("format").and_then(|v| v.as_str()).unwrap_or("png");
136
137 let format = match format_str {
138 "jpeg" | "jpg" => CaptureFormat::Jpeg,
139 "webp" => CaptureFormat::Webp,
140 _ => CaptureFormat::Png,
141 };
142
143 match browser.navigate(url).await {
144 Ok(page) => {
145 let options = CaptureOptions {
146 format,
147 full_page,
148 as_base64: true,
149 ..Default::default()
150 };
151
152 match PageCapture::capture(&page, &options).await {
153 Ok(result) => {
154 let base64 = result.base64.clone().unwrap_or_else(|| result.to_base64());
155 ToolCallResult::image(base64, result.mime_type())
156 }
157 Err(e) => ToolCallResult::error(format!("Screenshot failed: {}", e)),
158 }
159 }
160 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
161 }
162 }
163
164 async fn execute_pdf(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
165 let url = match args.get("url").and_then(|v| v.as_str()) {
166 Some(u) => u,
167 None => return ToolCallResult::error("Missing required parameter: url"),
168 };
169
170 match browser.navigate(url).await {
171 Ok(page) => {
172 let options = CaptureOptions::pdf();
173
174 match PageCapture::capture(&page, &options).await {
175 Ok(result) => {
176 let base64 = result.to_base64();
177 ToolCallResult::multi(vec![
178 ToolContent::text(format!("PDF generated: {} bytes", result.size)),
179 ToolContent::Resource {
180 uri: format!("pdf://{}", url),
181 resource: crate::mcp::types::ResourceContent {
182 mime_type: "application/pdf".to_string(),
183 text: None,
184 blob: Some(base64),
185 },
186 },
187 ])
188 }
189 Err(e) => ToolCallResult::error(format!("PDF generation failed: {}", e)),
190 }
191 }
192 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
193 }
194 }
195
196 async fn execute_extract_content(
197 &self,
198 browser: &BrowserController,
199 args: Value,
200 ) -> ToolCallResult {
201 let url = match args.get("url").and_then(|v| v.as_str()) {
202 Some(u) => u,
203 None => return ToolCallResult::error("Missing required parameter: url"),
204 };
205
206 let selector = args.get("selector").and_then(|v| v.as_str());
207 let format = args
208 .get("format")
209 .and_then(|v| v.as_str())
210 .unwrap_or("markdown");
211
212 match browser.navigate(url).await {
213 Ok(page) => {
214 let content = if let Some(sel) = selector {
215 ContentExtractor::extract_from_selector(&page, sel).await
216 } else {
217 ContentExtractor::extract_main_content(&page).await
218 };
219
220 match content {
221 Ok(c) => {
222 let output = match format {
223 "text" => c.text,
224 "html" => c.html,
225 _ => c.markdown.unwrap_or(c.text),
226 };
227 ToolCallResult::text(output)
228 }
229 Err(e) => ToolCallResult::error(format!("Content extraction failed: {}", e)),
230 }
231 }
232 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
233 }
234 }
235
236 async fn execute_extract_links(
237 &self,
238 browser: &BrowserController,
239 args: Value,
240 ) -> ToolCallResult {
241 let url = match args.get("url").and_then(|v| v.as_str()) {
242 Some(u) => u,
243 None => return ToolCallResult::error("Missing required parameter: url"),
244 };
245
246 let link_type = args.get("type").and_then(|v| v.as_str());
247 let selector = args.get("selector").and_then(|v| v.as_str());
248
249 match browser.navigate(url).await {
250 Ok(page) => {
251 let links = if let Some(sel) = selector {
252 LinkExtractor::extract_from_selector(&page, sel).await
253 } else {
254 match link_type {
255 Some("internal") => LinkExtractor::extract_internal(&page).await,
256 Some("external") => LinkExtractor::extract_external(&page).await,
257 _ => LinkExtractor::extract_all(&page).await,
258 }
259 };
260
261 match links {
262 Ok(links) => {
263 let json = serde_json::to_string_pretty(&links)
264 .unwrap_or_else(|_| "[]".to_string());
265 ToolCallResult::text(json)
266 }
267 Err(e) => ToolCallResult::error(format!("Link extraction failed: {}", e)),
268 }
269 }
270 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
271 }
272 }
273
274 async fn execute_extract_metadata(
275 &self,
276 browser: &BrowserController,
277 args: Value,
278 ) -> ToolCallResult {
279 let url = match args.get("url").and_then(|v| v.as_str()) {
280 Some(u) => u,
281 None => return ToolCallResult::error("Missing required parameter: url"),
282 };
283
284 match browser.navigate(url).await {
285 Ok(page) => match MetadataExtractor::extract(&page).await {
286 Ok(meta) => {
287 let json =
288 serde_json::to_string_pretty(&meta).unwrap_or_else(|_| "{}".to_string());
289 ToolCallResult::text(json)
290 }
291 Err(e) => ToolCallResult::error(format!("Metadata extraction failed: {}", e)),
292 },
293 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
294 }
295 }
296
297 async fn execute_js(&self, browser: &BrowserController, args: Value) -> ToolCallResult {
298 let url = match args.get("url").and_then(|v| v.as_str()) {
299 Some(u) => u,
300 None => return ToolCallResult::error("Missing required parameter: url"),
301 };
302
303 let script = match args.get("script").and_then(|v| v.as_str()) {
304 Some(s) => s,
305 None => return ToolCallResult::error("Missing required parameter: script"),
306 };
307
308 match browser.navigate(url).await {
309 Ok(page) => match page.page.evaluate(script).await {
310 Ok(result) => {
311 let value: Value = result.into_value().unwrap_or(Value::Null);
312 let output =
313 serde_json::to_string_pretty(&value).unwrap_or_else(|_| "null".to_string());
314 ToolCallResult::text(output)
315 }
316 Err(e) => ToolCallResult::error(format!("JavaScript execution failed: {}", e)),
317 },
318 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
319 }
320 }
321
322 async fn execute_capture_mhtml(
323 &self,
324 browser: &BrowserController,
325 args: Value,
326 ) -> ToolCallResult {
327 let url = match args.get("url").and_then(|v| v.as_str()) {
328 Some(u) => u,
329 None => return ToolCallResult::error("Missing required parameter: url"),
330 };
331
332 match browser.navigate(url).await {
333 Ok(page) => match PageCapture::mhtml(&page).await {
334 Ok(result) => {
335 let base64 = result.to_base64();
336 ToolCallResult::multi(vec![
337 ToolContent::text(format!("MHTML captured: {} bytes", result.size)),
338 ToolContent::Resource {
339 uri: format!("mhtml://{}", url),
340 resource: crate::mcp::types::ResourceContent {
341 mime_type: "multipart/related".to_string(),
342 text: None,
343 blob: Some(base64),
344 },
345 },
346 ])
347 }
348 Err(e) => ToolCallResult::error(format!("MHTML capture failed: {}", e)),
349 },
350 Err(e) => ToolCallResult::error(format!("Navigation failed: {}", e)),
351 }
352 }
353}
354
355impl Default for ToolRegistry {
356 fn default() -> Self {
357 Self::new()
358 }
359}
360
361struct WebNavigateTool;
367
368impl McpTool for WebNavigateTool {
369 fn name(&self) -> &str {
370 "web_navigate"
371 }
372
373 fn description(&self) -> &str {
374 "Navigate to a URL using a headless browser"
375 }
376
377 fn input_schema(&self) -> Value {
378 json!({
379 "type": "object",
380 "properties": {
381 "url": {
382 "type": "string",
383 "description": "The URL to navigate to"
384 },
385 "waitFor": {
386 "type": "string",
387 "description": "CSS selector to wait for before returning",
388 "optional": true
389 }
390 },
391 "required": ["url"]
392 })
393 }
394}
395
396struct WebScreenshotTool;
398
399impl McpTool for WebScreenshotTool {
400 fn name(&self) -> &str {
401 "web_screenshot"
402 }
403
404 fn description(&self) -> &str {
405 "Capture a screenshot of a web page"
406 }
407
408 fn input_schema(&self) -> Value {
409 json!({
410 "type": "object",
411 "properties": {
412 "url": {
413 "type": "string",
414 "description": "The URL to capture"
415 },
416 "fullPage": {
417 "type": "boolean",
418 "description": "Capture full page (default: true)",
419 "default": true
420 },
421 "format": {
422 "type": "string",
423 "enum": ["png", "jpeg", "webp"],
424 "description": "Image format (default: png)",
425 "default": "png"
426 },
427 "selector": {
428 "type": "string",
429 "description": "CSS selector to capture specific element"
430 }
431 },
432 "required": ["url"]
433 })
434 }
435}
436
437struct WebPdfTool;
439
440impl McpTool for WebPdfTool {
441 fn name(&self) -> &str {
442 "web_pdf"
443 }
444
445 fn description(&self) -> &str {
446 "Generate a PDF of a web page"
447 }
448
449 fn input_schema(&self) -> Value {
450 json!({
451 "type": "object",
452 "properties": {
453 "url": {
454 "type": "string",
455 "description": "The URL to convert to PDF"
456 },
457 "printBackground": {
458 "type": "boolean",
459 "description": "Print background graphics (default: true)",
460 "default": true
461 }
462 },
463 "required": ["url"]
464 })
465 }
466}
467
468struct WebExtractContentTool;
470
471impl McpTool for WebExtractContentTool {
472 fn name(&self) -> &str {
473 "web_extract_content"
474 }
475
476 fn description(&self) -> &str {
477 "Extract main content from a web page as text or markdown"
478 }
479
480 fn input_schema(&self) -> Value {
481 json!({
482 "type": "object",
483 "properties": {
484 "url": {
485 "type": "string",
486 "description": "The URL to extract content from"
487 },
488 "selector": {
489 "type": "string",
490 "description": "CSS selector to extract from (default: auto-detect main content)"
491 },
492 "format": {
493 "type": "string",
494 "enum": ["text", "markdown", "html"],
495 "description": "Output format (default: markdown)",
496 "default": "markdown"
497 }
498 },
499 "required": ["url"]
500 })
501 }
502}
503
504struct WebExtractLinksTool;
506
507impl McpTool for WebExtractLinksTool {
508 fn name(&self) -> &str {
509 "web_extract_links"
510 }
511
512 fn description(&self) -> &str {
513 "Extract all links from a web page with context"
514 }
515
516 fn input_schema(&self) -> Value {
517 json!({
518 "type": "object",
519 "properties": {
520 "url": {
521 "type": "string",
522 "description": "The URL to extract links from"
523 },
524 "type": {
525 "type": "string",
526 "enum": ["all", "internal", "external"],
527 "description": "Type of links to extract (default: all)",
528 "default": "all"
529 },
530 "selector": {
531 "type": "string",
532 "description": "CSS selector to extract links from"
533 }
534 },
535 "required": ["url"]
536 })
537 }
538}
539
540struct WebExtractMetadataTool;
542
543impl McpTool for WebExtractMetadataTool {
544 fn name(&self) -> &str {
545 "web_extract_metadata"
546 }
547
548 fn description(&self) -> &str {
549 "Extract page metadata (title, description, Open Graph, Twitter Card, etc.)"
550 }
551
552 fn input_schema(&self) -> Value {
553 json!({
554 "type": "object",
555 "properties": {
556 "url": {
557 "type": "string",
558 "description": "The URL to extract metadata from"
559 }
560 },
561 "required": ["url"]
562 })
563 }
564}
565
566struct WebExecuteJsTool;
568
569impl McpTool for WebExecuteJsTool {
570 fn name(&self) -> &str {
571 "web_execute_js"
572 }
573
574 fn description(&self) -> &str {
575 "Execute JavaScript on a web page and return the result"
576 }
577
578 fn input_schema(&self) -> Value {
579 json!({
580 "type": "object",
581 "properties": {
582 "url": {
583 "type": "string",
584 "description": "The URL to execute JavaScript on"
585 },
586 "script": {
587 "type": "string",
588 "description": "The JavaScript code to execute"
589 }
590 },
591 "required": ["url", "script"]
592 })
593 }
594}
595
596struct WebCaptureMhtmlTool;
598
599impl McpTool for WebCaptureMhtmlTool {
600 fn name(&self) -> &str {
601 "web_capture_mhtml"
602 }
603
604 fn description(&self) -> &str {
605 "Capture a complete web page as an MHTML archive"
606 }
607
608 fn input_schema(&self) -> Value {
609 json!({
610 "type": "object",
611 "properties": {
612 "url": {
613 "type": "string",
614 "description": "The URL to capture"
615 }
616 },
617 "required": ["url"]
618 })
619 }
620}
621
622pub const AVAILABLE_TOOLS: &[&str] = &[
624 "web_navigate",
625 "web_screenshot",
626 "web_pdf",
627 "web_extract_content",
628 "web_extract_links",
629 "web_extract_metadata",
630 "web_execute_js",
631 "web_capture_mhtml",
632];
633
634#[cfg(test)]
635mod tests {
636 use super::*;
637
638 #[test]
639 fn test_tool_registry_new() {
640 let registry = ToolRegistry::new();
641 assert!(registry.tools.len() >= 8);
642 }
643
644 #[test]
645 fn test_tool_definitions() {
646 let registry = ToolRegistry::new();
647 let defs = registry.definitions();
648 assert!(!defs.is_empty());
649
650 let nav = defs.iter().find(|d| d.name == "web_navigate");
652 assert!(nav.is_some());
653 }
654
655 #[test]
656 fn test_web_navigate_tool() {
657 let tool = WebNavigateTool;
658 assert_eq!(tool.name(), "web_navigate");
659 assert!(tool.description().contains("Navigate"));
660
661 let schema = tool.input_schema();
662 assert!(schema["properties"]["url"].is_object());
663 }
664
665 #[test]
666 fn test_available_tools() {
667 assert!(AVAILABLE_TOOLS.contains(&"web_navigate"));
668 assert!(AVAILABLE_TOOLS.contains(&"web_screenshot"));
669 assert!(AVAILABLE_TOOLS.contains(&"web_execute_js"));
670 }
671}