Skip to main content

browser_control/mcp/
tools.rs

1//! MCP tools exposed by the `browser-control mcp` server.
2//!
3//! Five tools wrap the underlying CDP / BiDi clients:
4//! `navigate`, `get_dom`, `screenshot`, `fetch`, `select_element`.
5
6use anyhow::{anyhow, Result};
7use serde_json::{json, Value};
8use std::sync::Arc;
9
10use crate::detect::Engine;
11use crate::dom::scripts::{FETCH_JS, GET_DOM_JS, SELECT_ELEMENT_JS};
12use crate::mcp::server::{RegisteredTool, ServerState, ToolHandler, ToolRegistry};
13
14/// Register the standard tool set onto the given registry.
15pub fn register_all(registry: &ToolRegistry) {
16    registry.register(make_navigate());
17    registry.register(make_get_dom());
18    registry.register(make_screenshot());
19    registry.register(make_fetch());
20    registry.register(make_select_element());
21}
22
23// ---------------------------------------------------------------------------
24// Engine dispatch helpers.
25// ---------------------------------------------------------------------------
26
27async fn open_cdp(endpoint: &str) -> Result<crate::cdp::CdpClient> {
28    if endpoint.starts_with("ws://") || endpoint.starts_with("wss://") {
29        crate::cdp::CdpClient::connect(endpoint).await
30    } else {
31        crate::cdp::CdpClient::connect_http(endpoint).await
32    }
33}
34
35async fn open_bidi(endpoint: &str) -> Result<crate::bidi::BidiClient> {
36    if endpoint.starts_with("ws://") || endpoint.starts_with("wss://") {
37        crate::bidi::BidiClient::connect(endpoint).await
38    } else {
39        let client = reqwest::Client::new();
40        let v: Value = client
41            .get(format!("{}/json/version", endpoint.trim_end_matches('/')))
42            .send()
43            .await?
44            .json()
45            .await?;
46        let ws = v
47            .get("webSocketDebuggerUrl")
48            .and_then(|v| v.as_str())
49            .ok_or_else(|| anyhow!("no webSocketDebuggerUrl"))?
50            .to_string();
51        crate::bidi::BidiClient::connect(&ws).await
52    }
53}
54
55/// Attach to the first `page` target via CDP and return `(client, session_id)`.
56async fn cdp_attach_first_page(endpoint: &str) -> Result<(crate::cdp::CdpClient, String)> {
57    let client = open_cdp(endpoint).await?;
58    let targets = client.list_targets().await?;
59    let target_id = targets
60        .iter()
61        .find(|t| t.get("type").and_then(|v| v.as_str()) == Some("page"))
62        .and_then(|t| t.get("targetId").and_then(|v| v.as_str()))
63        .ok_or_else(|| anyhow!("no page target found"))?
64        .to_string();
65    let session_id = client.attach_to_target(&target_id).await?;
66    Ok((client, session_id))
67}
68
69/// Open or return the cached BiDi session and top-level browsing context.
70///
71/// Firefox limits a browser instance to one active BiDi session at a time,
72/// so we open the WebSocket once and reuse it for every tool call.
73async fn bidi_top_context(
74    state: &ServerState,
75) -> Result<(std::sync::Arc<crate::bidi::BidiClient>, String)> {
76    let mut guard = state.bidi.lock().await;
77    if let Some((c, ctx)) = guard.as_ref() {
78        return Ok((c.clone(), ctx.clone()));
79    }
80    let client = open_bidi(&state.browser.endpoint).await?;
81    client.session_new().await?;
82    let tree = client.send("browsingContext.getTree", json!({})).await?;
83    let ctx = tree["contexts"][0]["context"]
84        .as_str()
85        .ok_or_else(|| anyhow!("no top-level browsing context"))?
86        .to_string();
87    let arc = std::sync::Arc::new(client);
88    *guard = Some((arc.clone(), ctx.clone()));
89    Ok((arc, ctx))
90}
91
92fn text_content(text: impl Into<String>) -> Value {
93    json!({ "content": [ { "type": "text", "text": text.into() } ] })
94}
95
96fn image_content(data: String) -> Value {
97    json!({
98        "content": [ { "type": "image", "data": data, "mimeType": "image/png" } ]
99    })
100}
101
102fn handler<F>(f: F) -> ToolHandler
103where
104    F: Fn(ServerState, Value) -> futures_util::future::BoxFuture<'static, Result<Value>>
105        + Send
106        + Sync
107        + 'static,
108{
109    Arc::new(f)
110}
111
112// ---------------------------------------------------------------------------
113// navigate
114// ---------------------------------------------------------------------------
115
116fn make_navigate() -> RegisteredTool {
117    RegisteredTool {
118        name: "navigate".into(),
119        description: "Navigate the active page to a URL.".into(),
120        input_schema: json!({
121            "type": "object",
122            "properties": { "url": { "type": "string" } },
123            "required": ["url"],
124        }),
125        handler: handler(|state, args| {
126            Box::pin(async move {
127                let url = args
128                    .get("url")
129                    .and_then(|v| v.as_str())
130                    .ok_or_else(|| anyhow!("missing 'url'"))?
131                    .to_string();
132                match state.browser.engine {
133                    Engine::Cdp => {
134                        let (client, session_id) =
135                            cdp_attach_first_page(&state.browser.endpoint).await?;
136                        client
137                            .send_with_session(
138                                "Page.navigate",
139                                json!({ "url": url }),
140                                Some(&session_id),
141                            )
142                            .await?;
143                        client.close().await;
144                    }
145                    Engine::Bidi => {
146                        let (client, ctx) = bidi_top_context(&state).await?;
147                        client.browsing_context_navigate(&ctx, &url).await?;
148                    }
149                }
150                Ok(text_content(format!("Navigated to {url}")))
151            })
152        }),
153    }
154}
155
156// ---------------------------------------------------------------------------
157// get_dom
158// ---------------------------------------------------------------------------
159
160fn make_get_dom() -> RegisteredTool {
161    RegisteredTool {
162        name: "get_dom".into(),
163        description: "Get the rendered DOM as HTML, with shadow roots serialized when supported."
164            .into(),
165        input_schema: json!({
166            "type": "object",
167            "properties": {
168                "selector": {
169                    "type": "string",
170                    "description": "Optional CSS selector; defaults to the document element."
171                }
172            },
173        }),
174        handler: handler(|state, args| {
175            Box::pin(async move {
176                let selector_arg = args.get("selector").and_then(|v| v.as_str());
177                let selector_literal = match selector_arg {
178                    Some(s) => serde_json::to_string(s)?,
179                    None => "null".to_string(),
180                };
181                let expr = format!("({GET_DOM_JS})({selector_literal})");
182                let html = match state.browser.engine {
183                    Engine::Cdp => {
184                        let (client, session_id) =
185                            cdp_attach_first_page(&state.browser.endpoint).await?;
186                        let v = client
187                            .send_with_session(
188                                "Runtime.evaluate",
189                                json!({
190                                    "expression": expr,
191                                    "returnByValue": true,
192                                    "awaitPromise": false,
193                                }),
194                                Some(&session_id),
195                            )
196                            .await?;
197                        client.close().await;
198                        v["result"]["value"].as_str().unwrap_or("").to_string()
199                    }
200                    Engine::Bidi => {
201                        let (client, ctx) = bidi_top_context(&state).await?;
202                        let v = client.script_evaluate(&ctx, &expr).await?;
203                        v["result"]["value"].as_str().unwrap_or("").to_string()
204                    }
205                };
206                Ok(text_content(html))
207            })
208        }),
209    }
210}
211
212// ---------------------------------------------------------------------------
213// screenshot
214// ---------------------------------------------------------------------------
215
216fn make_screenshot() -> RegisteredTool {
217    RegisteredTool {
218        name: "screenshot".into(),
219        description: "Capture a PNG screenshot of the active page.".into(),
220        input_schema: json!({
221            "type": "object",
222            "properties": {
223                "full_page": { "type": "boolean", "default": false },
224                "selector": { "type": "string" }
225            },
226        }),
227        handler: handler(|state, args| {
228            Box::pin(async move {
229                let full_page = args
230                    .get("full_page")
231                    .and_then(|v| v.as_bool())
232                    .unwrap_or(false);
233                let b64 = match state.browser.engine {
234                    Engine::Cdp => {
235                        let (client, session_id) =
236                            cdp_attach_first_page(&state.browser.endpoint).await?;
237                        let v = client
238                            .send_with_session(
239                                "Page.captureScreenshot",
240                                json!({
241                                    "format": "png",
242                                    "captureBeyondViewport": full_page,
243                                }),
244                                Some(&session_id),
245                            )
246                            .await?;
247                        client.close().await;
248                        v["data"]
249                            .as_str()
250                            .ok_or_else(|| anyhow!("no screenshot data"))?
251                            .to_string()
252                    }
253                    Engine::Bidi => {
254                        let (client, ctx) = bidi_top_context(&state).await?;
255                        let data = client.browsing_context_capture_screenshot(&ctx).await?;
256                        data
257                    }
258                };
259                Ok(image_content(b64))
260            })
261        }),
262    }
263}
264
265// ---------------------------------------------------------------------------
266// fetch
267// ---------------------------------------------------------------------------
268
269fn make_fetch() -> RegisteredTool {
270    RegisteredTool {
271        name: "fetch".into(),
272        description:
273            "Perform an HTTP request from the page context (preserves cookies, bypasses CORS)."
274                .into(),
275        input_schema: json!({
276            "type": "object",
277            "properties": {
278                "url": { "type": "string" },
279                "method": { "type": "string" },
280                "headers": { "type": "object" },
281                "body": { "type": "string" }
282            },
283            "required": ["url"],
284        }),
285        handler: handler(|state, args| {
286            Box::pin(async move {
287                if args.get("url").and_then(|v| v.as_str()).is_none() {
288                    return Err(anyhow!("missing 'url'"));
289                }
290                let args_json = serde_json::to_string(&args)?;
291                let args_literal = serde_json::to_string(&args_json)?;
292                let expr = format!("({FETCH_JS})({args_literal})");
293                let raw = match state.browser.engine {
294                    Engine::Cdp => {
295                        let (client, session_id) =
296                            cdp_attach_first_page(&state.browser.endpoint).await?;
297                        let v = client
298                            .send_with_session(
299                                "Runtime.evaluate",
300                                json!({
301                                    "expression": expr,
302                                    "returnByValue": true,
303                                    "awaitPromise": true,
304                                }),
305                                Some(&session_id),
306                            )
307                            .await?;
308                        client.close().await;
309                        v["result"]["value"].as_str().unwrap_or("").to_string()
310                    }
311                    Engine::Bidi => {
312                        let (client, ctx) = bidi_top_context(&state).await?;
313                        let v = client.script_evaluate(&ctx, &expr).await?;
314                        v["result"]["value"].as_str().unwrap_or("").to_string()
315                    }
316                };
317                let parsed: Value = serde_json::from_str(&raw)
318                    .map_err(|e| anyhow!("invalid fetch response JSON: {e}"))?;
319                let pretty = serde_json::to_string_pretty(&parsed)?;
320                Ok(text_content(pretty))
321            })
322        }),
323    }
324}
325
326// ---------------------------------------------------------------------------
327// select_element
328// ---------------------------------------------------------------------------
329
330fn make_select_element() -> RegisteredTool {
331    RegisteredTool {
332        name: "select_element".into(),
333        description:
334            "Show an interactive overlay; resolve with the CSS selector for the clicked element."
335                .into(),
336        input_schema: json!({
337            "type": "object",
338            "properties": {},
339        }),
340        handler: handler(|state, _args| {
341            Box::pin(async move {
342                let expr = SELECT_ELEMENT_JS.to_string();
343                let selector = match state.browser.engine {
344                    Engine::Cdp => {
345                        let (client, session_id) =
346                            cdp_attach_first_page(&state.browser.endpoint).await?;
347                        let v = client
348                            .send_with_session(
349                                "Runtime.evaluate",
350                                json!({
351                                    "expression": expr,
352                                    "returnByValue": true,
353                                    "awaitPromise": true,
354                                }),
355                                Some(&session_id),
356                            )
357                            .await?;
358                        client.close().await;
359                        v["result"]["value"].as_str().unwrap_or("").to_string()
360                    }
361                    Engine::Bidi => {
362                        let (client, ctx) = bidi_top_context(&state).await?;
363                        let v = client.script_evaluate(&ctx, &expr).await?;
364                        v["result"]["value"].as_str().unwrap_or("").to_string()
365                    }
366                };
367                Ok(text_content(selector))
368            })
369        }),
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    #[test]
378    fn register_all_adds_five_tools() {
379        let registry = ToolRegistry::new();
380        register_all(&registry);
381        let list = registry.list();
382        assert_eq!(list.len(), 5);
383        let names: Vec<&str> = list.iter().map(|t| t["name"].as_str().unwrap()).collect();
384        for expected in &[
385            "navigate",
386            "get_dom",
387            "screenshot",
388            "fetch",
389            "select_element",
390        ] {
391            assert!(
392                names.contains(expected),
393                "missing tool {expected} in {names:?}"
394            );
395        }
396    }
397
398    #[test]
399    fn every_tool_has_object_input_schema() {
400        let registry = ToolRegistry::new();
401        register_all(&registry);
402        for t in registry.list() {
403            let schema = &t["inputSchema"];
404            assert!(schema.is_object(), "schema not object: {schema}");
405            assert_eq!(
406                schema["type"], "object",
407                "schema type != object for {}: {schema}",
408                t["name"]
409            );
410        }
411    }
412}