Skip to main content

boost/tools/
browser_tools.rs

1//! Browser MCP tools — Playwright-equivalent surface for AI agents.
2//!
3//! All four tools accept a `url` and drive a shared headless Chromium instance
4//! (see `crate::browser`). Pages are torn down after each tool call to avoid
5//! cross-call state leaks. The browser itself is reused.
6
7use async_trait::async_trait;
8use base64::engine::general_purpose::STANDARD as B64;
9use base64::Engine as _;
10use chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat;
11use chromiumoxide::handler::viewport::Viewport;
12use chromiumoxide::page::ScreenshotParams;
13use futures::StreamExt;
14use serde_json::{json, Value};
15
16use crate::browser::BrowserManager;
17use crate::protocol::CallToolResult;
18use crate::tool::{Context, Tool};
19
20fn url_schema(extra_props: Value) -> Value {
21    let mut props = serde_json::Map::new();
22    props.insert(
23        "url".into(),
24        json!({ "type": "string", "description": "Absolute or relative URL to open." }),
25    );
26    if let Value::Object(obj) = extra_props {
27        for (k, v) in obj {
28            props.insert(k, v);
29        }
30    }
31    json!({
32        "type": "object",
33        "required": ["url"],
34        "properties": props,
35    })
36}
37
38fn require_url(args: &Value) -> Result<String, CallToolResult> {
39    match args.get("url").and_then(|v| v.as_str()) {
40        Some(u) if !u.is_empty() => Ok(u.to_string()),
41        _ => Err(CallToolResult::error("`url` is required")),
42    }
43}
44
45// ─── browser-screenshot ────────────────────────────────────────────────────
46
47pub struct BrowserScreenshot {
48    pub manager: BrowserManager,
49}
50
51#[async_trait]
52impl Tool for BrowserScreenshot {
53    fn name(&self) -> &'static str {
54        "browser-screenshot"
55    }
56    fn description(&self) -> &'static str {
57        "Open a URL in a headless Chromium and return a PNG screenshot as base64. Optional `width`/`height` set the viewport; `full_page=true` captures the entire scrollable page."
58    }
59    fn input_schema(&self) -> Value {
60        url_schema(json!({
61            "width":     { "type": "integer", "default": 1280 },
62            "height":    { "type": "integer", "default": 800 },
63            "full_page": { "type": "boolean", "default": false }
64        }))
65    }
66
67    async fn call(&self, _ctx: &Context, args: Value) -> CallToolResult {
68        let url = match require_url(&args) {
69            Ok(u) => u,
70            Err(r) => return r,
71        };
72        let width = args.get("width").and_then(|v| v.as_u64()).unwrap_or(1280) as u32;
73        let height = args.get("height").and_then(|v| v.as_u64()).unwrap_or(800) as u32;
74        let full_page = args
75            .get("full_page")
76            .and_then(|v| v.as_bool())
77            .unwrap_or(false);
78
79        let page = match self.manager.open(&url).await {
80            Ok(p) => p,
81            Err(e) => return CallToolResult::error(e),
82        };
83        // Viewport is set via CDP. The builder in chromiumoxide 0.7 returns
84        // `Result<SetDeviceMetricsOverrideParams, _>`; unwrap-or-skip.
85        use chromiumoxide::cdp::browser_protocol::emulation::SetDeviceMetricsOverrideParams;
86        if let Ok(params) = SetDeviceMetricsOverrideParams::builder()
87            .width(width as i64)
88            .height(height as i64)
89            .device_scale_factor(1.0)
90            .mobile(false)
91            .build()
92        {
93            let _ = page.execute(params).await;
94        }
95        let _ = Viewport::default(); // suppress unused import
96
97        let mut params = ScreenshotParams::builder().format(CaptureScreenshotFormat::Png);
98        if full_page {
99            params = params.full_page(true);
100        }
101        let png = match page.screenshot(params.build()).await {
102            Ok(b) => b,
103            Err(e) => return CallToolResult::error(format!("screenshot: {e}")),
104        };
105        let _ = page.close().await;
106
107        let encoded = B64.encode(&png);
108        CallToolResult::json(&json!({
109            "url": url,
110            "bytes": png.len(),
111            "width": width,
112            "height": height,
113            "full_page": full_page,
114            "format": "png",
115            "base64": encoded,
116        }))
117    }
118}
119
120// ─── browser-console ────────────────────────────────────────────────────────
121
122pub struct BrowserConsole {
123    pub manager: BrowserManager,
124}
125
126#[async_trait]
127impl Tool for BrowserConsole {
128    fn name(&self) -> &'static str {
129        "browser-console"
130    }
131    fn description(&self) -> &'static str {
132        "Open a URL and collect console messages emitted by the page. Returns level + text for each entry. Useful for spotting JS errors after a Spark interaction."
133    }
134    fn input_schema(&self) -> Value {
135        url_schema(json!({
136            "wait_ms": { "type": "integer", "default": 500, "description": "How long to listen after load before reporting." }
137        }))
138    }
139
140    async fn call(&self, _ctx: &Context, args: Value) -> CallToolResult {
141        let url = match require_url(&args) {
142            Ok(u) => u,
143            Err(r) => return r,
144        };
145        let wait_ms = args.get("wait_ms").and_then(|v| v.as_u64()).unwrap_or(500);
146
147        use chromiumoxide::cdp::browser_protocol::log::EventEntryAdded;
148        use chromiumoxide::cdp::js_protocol::runtime::EventConsoleApiCalled;
149
150        let page = match self.manager.open(&url).await {
151            Ok(p) => p,
152            Err(e) => return CallToolResult::error(e),
153        };
154
155        let mut console_events = match page.event_listener::<EventConsoleApiCalled>().await {
156            Ok(s) => s,
157            Err(e) => return CallToolResult::error(format!("event listener: {e}")),
158        };
159        let mut log_events = match page.event_listener::<EventEntryAdded>().await {
160            Ok(s) => s,
161            Err(e) => return CallToolResult::error(format!("event listener: {e}")),
162        };
163
164        let mut messages = Vec::<Value>::new();
165        let deadline = std::time::Instant::now() + std::time::Duration::from_millis(wait_ms);
166        loop {
167            let timeout = deadline
168                .checked_duration_since(std::time::Instant::now())
169                .unwrap_or_default();
170            if timeout.is_zero() {
171                break;
172            }
173            tokio::select! {
174                _ = tokio::time::sleep(timeout) => break,
175                evt = console_events.next() => {
176                    if let Some(evt) = evt {
177                        let text = evt.args.iter().filter_map(|a| a.value.as_ref().map(|v| v.to_string())).collect::<Vec<_>>().join(" ");
178                        messages.push(json!({
179                            "kind": "console",
180                            "level": format!("{:?}", evt.r#type),
181                            "text": text,
182                        }));
183                    }
184                }
185                evt = log_events.next() => {
186                    if let Some(evt) = evt {
187                        messages.push(json!({
188                            "kind": "log",
189                            "level": format!("{:?}", evt.entry.level),
190                            "text": evt.entry.text,
191                            "source": format!("{:?}", evt.entry.source),
192                            "url": evt.entry.url,
193                        }));
194                    }
195                }
196            }
197        }
198
199        let _ = page.close().await;
200
201        CallToolResult::json(&json!({
202            "url": url,
203            "count": messages.len(),
204            "messages": messages,
205        }))
206    }
207}
208
209// ─── browser-network ────────────────────────────────────────────────────────
210
211pub struct BrowserNetwork {
212    pub manager: BrowserManager,
213}
214
215#[async_trait]
216impl Tool for BrowserNetwork {
217    fn name(&self) -> &'static str {
218        "browser-network"
219    }
220    fn description(&self) -> &'static str {
221        "Open a URL and return the network requests the page made. Each entry has method, URL, resource type, and status (when available)."
222    }
223    fn input_schema(&self) -> Value {
224        url_schema(json!({
225            "wait_ms": { "type": "integer", "default": 1000 }
226        }))
227    }
228
229    async fn call(&self, _ctx: &Context, args: Value) -> CallToolResult {
230        let url = match require_url(&args) {
231            Ok(u) => u,
232            Err(r) => return r,
233        };
234        let wait_ms = args.get("wait_ms").and_then(|v| v.as_u64()).unwrap_or(1000);
235
236        use chromiumoxide::cdp::browser_protocol::network::{
237            EventRequestWillBeSent, EventResponseReceived,
238        };
239
240        let page = match self.manager.open(&url).await {
241            Ok(p) => p,
242            Err(e) => return CallToolResult::error(e),
243        };
244
245        let mut req_stream = match page.event_listener::<EventRequestWillBeSent>().await {
246            Ok(s) => s,
247            Err(e) => return CallToolResult::error(format!("event listener: {e}")),
248        };
249        let mut resp_stream = match page.event_listener::<EventResponseReceived>().await {
250            Ok(s) => s,
251            Err(e) => return CallToolResult::error(format!("event listener: {e}")),
252        };
253
254        let mut by_id: indexmap::IndexMap<String, serde_json::Map<String, Value>> =
255            indexmap::IndexMap::new();
256        let deadline = std::time::Instant::now() + std::time::Duration::from_millis(wait_ms);
257        loop {
258            let timeout = deadline
259                .checked_duration_since(std::time::Instant::now())
260                .unwrap_or_default();
261            if timeout.is_zero() {
262                break;
263            }
264            tokio::select! {
265                _ = tokio::time::sleep(timeout) => break,
266                evt = req_stream.next() => {
267                    if let Some(evt) = evt {
268                        let mut m = serde_json::Map::new();
269                        m.insert("method".into(), json!(evt.request.method));
270                        m.insert("url".into(), json!(evt.request.url));
271                        m.insert("type".into(), json!(format!("{:?}", evt.r#type)));
272                        by_id.entry(format!("{:?}", evt.request_id)).or_default().extend(m);
273                    }
274                }
275                evt = resp_stream.next() => {
276                    if let Some(evt) = evt {
277                        let entry = by_id.entry(format!("{:?}", evt.request_id)).or_default();
278                        entry.insert("status".into(), json!(evt.response.status));
279                        entry.insert("status_text".into(), json!(evt.response.status_text));
280                        entry.insert("mime_type".into(), json!(evt.response.mime_type));
281                    }
282                }
283            }
284        }
285        let _ = page.close().await;
286
287        let entries: Vec<Value> = by_id.into_iter().map(|(_, v)| Value::Object(v)).collect();
288        CallToolResult::json(&json!({
289            "url": url,
290            "count": entries.len(),
291            "requests": entries,
292        }))
293    }
294}
295
296// ─── browser-click ──────────────────────────────────────────────────────────
297
298pub struct BrowserClick {
299    pub manager: BrowserManager,
300}
301
302#[async_trait]
303impl Tool for BrowserClick {
304    fn name(&self) -> &'static str {
305        "browser-click"
306    }
307    fn description(&self) -> &'static str {
308        "Open a URL and click the first element matching a CSS selector. Returns the URL after the click (which may have navigated)."
309    }
310    fn input_schema(&self) -> Value {
311        url_schema(json!({
312            "selector": { "type": "string", "description": "CSS selector for the element to click." },
313            "wait_ms":  { "type": "integer", "default": 500, "description": "How long to wait after click before reading the URL." }
314        }))
315    }
316
317    async fn call(&self, _ctx: &Context, args: Value) -> CallToolResult {
318        let url = match require_url(&args) {
319            Ok(u) => u,
320            Err(r) => return r,
321        };
322        let selector = match args.get("selector").and_then(|v| v.as_str()) {
323            Some(s) if !s.is_empty() => s.to_string(),
324            _ => return CallToolResult::error("`selector` is required"),
325        };
326        let wait_ms = args.get("wait_ms").and_then(|v| v.as_u64()).unwrap_or(500);
327
328        let page = match self.manager.open(&url).await {
329            Ok(p) => p,
330            Err(e) => return CallToolResult::error(e),
331        };
332        let element = match page.find_element(&selector).await {
333            Ok(el) => el,
334            Err(e) => {
335                let _ = page.close().await;
336                return CallToolResult::error(format!("find_element({selector}): {e}"));
337            }
338        };
339        if let Err(e) = element.click().await {
340            let _ = page.close().await;
341            return CallToolResult::error(format!("click({selector}): {e}"));
342        }
343        tokio::time::sleep(std::time::Duration::from_millis(wait_ms)).await;
344
345        let new_url = page.url().await.ok().flatten().unwrap_or_default();
346        let _ = page.close().await;
347
348        CallToolResult::json(&json!({
349            "url": url,
350            "selector": selector,
351            "current_url": new_url,
352        }))
353    }
354}
355
356// ─── browser-fill ───────────────────────────────────────────────────────────
357
358pub struct BrowserFill {
359    pub manager: BrowserManager,
360}
361
362#[async_trait]
363impl Tool for BrowserFill {
364    fn name(&self) -> &'static str {
365        "browser-fill"
366    }
367    fn description(&self) -> &'static str {
368        "Open a URL, locate a single input by CSS selector, replace its value, and optionally submit the enclosing form. Returns the final URL after submit."
369    }
370    fn input_schema(&self) -> Value {
371        url_schema(json!({
372            "selector": { "type": "string", "description": "CSS selector targeting the input/textarea." },
373            "value":    { "type": "string", "description": "New value to set." },
374            "submit":   { "type": "boolean", "default": false, "description": "If true, dispatch a form submit after filling." },
375            "wait_ms":  { "type": "integer", "default": 500 }
376        }))
377    }
378
379    async fn call(&self, _ctx: &Context, args: Value) -> CallToolResult {
380        let url = match require_url(&args) {
381            Ok(u) => u,
382            Err(r) => return r,
383        };
384        let selector = match args.get("selector").and_then(|v| v.as_str()) {
385            Some(s) if !s.is_empty() => s.to_string(),
386            _ => return CallToolResult::error("`selector` is required"),
387        };
388        let value = args
389            .get("value")
390            .and_then(|v| v.as_str())
391            .unwrap_or("")
392            .to_string();
393        let submit = args
394            .get("submit")
395            .and_then(|v| v.as_bool())
396            .unwrap_or(false);
397        let wait_ms = args.get("wait_ms").and_then(|v| v.as_u64()).unwrap_or(500);
398
399        let page = match self.manager.open(&url).await {
400            Ok(p) => p,
401            Err(e) => return CallToolResult::error(e),
402        };
403
404        // Use JS evaluate to set value + fire the right events (works for
405        // controlled inputs in React/Vue/Spark alike).
406        let escaped = serde_json::to_string(&value).unwrap_or_else(|_| "\"\"".to_string());
407        let selector_lit = serde_json::to_string(&selector).unwrap_or_default();
408        let script = format!(
409            "(function() {{ const el = document.querySelector({selector_lit}); if (!el) return 'not_found'; el.value = {escaped}; el.dispatchEvent(new Event('input', {{ bubbles: true }})); el.dispatchEvent(new Event('change', {{ bubbles: true }})); {} return 'ok'; }})()",
410            if submit {
411                "if (el.form) el.form.requestSubmit ? el.form.requestSubmit() : el.form.submit();"
412            } else {
413                ""
414            }
415        );
416        let result = page.evaluate(script).await;
417        tokio::time::sleep(std::time::Duration::from_millis(wait_ms)).await;
418        let final_url = page.url().await.ok().flatten().unwrap_or_default();
419        let _ = page.close().await;
420
421        match result {
422            Ok(v) => {
423                let outcome = v.into_value().unwrap_or(Value::Null);
424                if outcome.as_str() == Some("not_found") {
425                    return CallToolResult::error(format!("selector `{selector}` not found"));
426                }
427                CallToolResult::json(&json!({
428                    "url": url,
429                    "selector": selector,
430                    "submitted": submit,
431                    "current_url": final_url,
432                }))
433            }
434            Err(e) => CallToolResult::error(format!("fill: {e}")),
435        }
436    }
437}
438
439// ─── browser-type ───────────────────────────────────────────────────────────
440
441pub struct BrowserType {
442    pub manager: BrowserManager,
443}
444
445#[async_trait]
446impl Tool for BrowserType {
447    fn name(&self) -> &'static str {
448        "browser-type"
449    }
450    fn description(&self) -> &'static str {
451        "Open a URL, focus an element by selector, and type a string (character-by-character keypresses, useful for triggering keydown handlers)."
452    }
453    fn input_schema(&self) -> Value {
454        url_schema(json!({
455            "selector": { "type": "string", "description": "CSS selector for the element to focus." },
456            "text":     { "type": "string", "description": "Text to type, one keypress per character." }
457        }))
458    }
459
460    async fn call(&self, _ctx: &Context, args: Value) -> CallToolResult {
461        let url = match require_url(&args) {
462            Ok(u) => u,
463            Err(r) => return r,
464        };
465        let selector = match args.get("selector").and_then(|v| v.as_str()) {
466            Some(s) if !s.is_empty() => s.to_string(),
467            _ => return CallToolResult::error("`selector` is required"),
468        };
469        let text = args
470            .get("text")
471            .and_then(|v| v.as_str())
472            .unwrap_or("")
473            .to_string();
474
475        let page = match self.manager.open(&url).await {
476            Ok(p) => p,
477            Err(e) => return CallToolResult::error(e),
478        };
479        let element = match page.find_element(&selector).await {
480            Ok(el) => el,
481            Err(e) => {
482                let _ = page.close().await;
483                return CallToolResult::error(format!("find_element({selector}): {e}"));
484            }
485        };
486        if let Err(e) = element.focus().await {
487            let _ = page.close().await;
488            return CallToolResult::error(format!("focus: {e}"));
489        }
490        if let Err(e) = element.type_str(&text).await {
491            let _ = page.close().await;
492            return CallToolResult::error(format!("type: {e}"));
493        }
494        let _ = page.close().await;
495
496        CallToolResult::json(&json!({
497            "url": url,
498            "selector": selector,
499            "chars_typed": text.chars().count(),
500        }))
501    }
502}
503
504// ─── browser-wait-for ───────────────────────────────────────────────────────
505
506pub struct BrowserWaitFor {
507    pub manager: BrowserManager,
508}
509
510#[async_trait]
511impl Tool for BrowserWaitFor {
512    fn name(&self) -> &'static str {
513        "browser-wait-for"
514    }
515    fn description(&self) -> &'static str {
516        "Open a URL and wait for an element matching a CSS selector to appear in the DOM (with a timeout). Useful for tests that need to wait out async data loads or Spark interactions."
517    }
518    fn input_schema(&self) -> Value {
519        url_schema(json!({
520            "selector":   { "type": "string", "description": "CSS selector to wait for." },
521            "timeout_ms": { "type": "integer", "default": 5000, "description": "Max time to wait, in ms." },
522            "poll_ms":    { "type": "integer", "default": 100,  "description": "Polling interval, in ms." }
523        }))
524    }
525
526    async fn call(&self, _ctx: &Context, args: Value) -> CallToolResult {
527        let url = match require_url(&args) {
528            Ok(u) => u,
529            Err(r) => return r,
530        };
531        let selector = match args.get("selector").and_then(|v| v.as_str()) {
532            Some(s) if !s.is_empty() => s.to_string(),
533            _ => return CallToolResult::error("`selector` is required"),
534        };
535        let timeout_ms = args
536            .get("timeout_ms")
537            .and_then(|v| v.as_u64())
538            .unwrap_or(5000);
539        let poll_ms = args.get("poll_ms").and_then(|v| v.as_u64()).unwrap_or(100);
540
541        let page = match self.manager.open(&url).await {
542            Ok(p) => p,
543            Err(e) => return CallToolResult::error(e),
544        };
545
546        let deadline = std::time::Instant::now() + std::time::Duration::from_millis(timeout_ms);
547        let mut found = false;
548        let mut elapsed_ms: u128 = 0;
549        while std::time::Instant::now() < deadline {
550            if page.find_element(&selector).await.is_ok() {
551                found = true;
552                break;
553            }
554            tokio::time::sleep(std::time::Duration::from_millis(poll_ms)).await;
555            elapsed_ms += poll_ms as u128;
556        }
557        let _ = page.close().await;
558
559        CallToolResult::json(&json!({
560            "url": url,
561            "selector": selector,
562            "found": found,
563            "elapsed_ms": elapsed_ms,
564        }))
565    }
566}
567
568// ─── browser-eval ───────────────────────────────────────────────────────────
569
570pub struct BrowserEval {
571    pub manager: BrowserManager,
572}
573
574#[async_trait]
575impl Tool for BrowserEval {
576    fn name(&self) -> &'static str {
577        "browser-eval"
578    }
579    fn description(&self) -> &'static str {
580        "Open a URL and evaluate a JavaScript expression in the page context. Returns the result as JSON (numbers, strings, booleans, null, or objects/arrays via JSON.stringify)."
581    }
582    fn input_schema(&self) -> Value {
583        url_schema(json!({
584            "script": { "type": "string", "description": "JS expression or statement(s). The final expression's value is returned." }
585        }))
586    }
587
588    async fn call(&self, _ctx: &Context, args: Value) -> CallToolResult {
589        let url = match require_url(&args) {
590            Ok(u) => u,
591            Err(r) => return r,
592        };
593        let script = match args.get("script").and_then(|v| v.as_str()) {
594            Some(s) if !s.is_empty() => s.to_string(),
595            _ => return CallToolResult::error("`script` is required"),
596        };
597
598        let page = match self.manager.open(&url).await {
599            Ok(p) => p,
600            Err(e) => return CallToolResult::error(e),
601        };
602
603        // Wrap in IIFE so we get the final expression value, and stringify any
604        // object/array so it round-trips through JSON cleanly.
605        let wrapped = format!(
606            "(function() {{ try {{ const __r = (function(){{ {script} }})(); return typeof __r === 'object' ? JSON.stringify(__r) : __r; }} catch (e) {{ return 'ERROR: ' + e.message; }} }})()"
607        );
608
609        let result = page.evaluate(wrapped).await;
610        let _ = page.close().await;
611
612        match result {
613            Ok(v) => {
614                let raw = v.into_value().unwrap_or(serde_json::Value::Null);
615                CallToolResult::json(&json!({
616                    "url": url,
617                    "value": raw,
618                }))
619            }
620            Err(e) => CallToolResult::error(format!("eval: {e}")),
621        }
622    }
623}