Skip to main content

agentkernel_sdk/
browser.rs

1//! Browser session for orchestrating headless browsers in sandboxes.
2//!
3//! Each method generates a self-contained Python/Playwright script,
4//! runs it inside the sandbox, and parses the JSON result.
5
6use base64::Engine;
7
8use crate::client::AgentKernel;
9use crate::error::{Error, Result};
10use crate::types::{AriaSnapshot, PageResult};
11
12// ---------------------------------------------------------------------------
13// Inline Playwright script templates
14// ---------------------------------------------------------------------------
15
16const GOTO_SCRIPT: &str = r#"
17import asyncio, json, sys
18from playwright.async_api import async_playwright
19async def main():
20    url = sys.argv[1]
21    async with async_playwright() as p:
22        b = await p.chromium.launch()
23        page = await b.new_page()
24        await page.goto(url, timeout=30000)
25        title = await page.title()
26        url_final = page.url
27        text = await page.evaluate("() => document.body.innerText.slice(0, 8000)")
28        links = await page.evaluate('''() =>
29            Array.from(document.querySelectorAll('a[href]'))
30                .slice(0, 50)
31                .map(a => ({text: a.textContent.trim(), href: a.href}))
32                .filter(l => l.href.startsWith("http"))
33        ''')
34        print(json.dumps({"title": title, "url": url_final, "text": text, "links": links}))
35        await b.close()
36asyncio.run(main())
37"#;
38
39const SCREENSHOT_SCRIPT: &str = r#"
40import asyncio, base64, json, sys
41from playwright.async_api import async_playwright
42async def main():
43    url = sys.argv[1]
44    async with async_playwright() as p:
45        b = await p.chromium.launch()
46        page = await b.new_page()
47        await page.goto(url, timeout=30000)
48        data = await page.screenshot()
49        print(base64.b64encode(data).decode())
50        await b.close()
51asyncio.run(main())
52"#;
53
54const EVALUATE_SCRIPT: &str = r#"
55import asyncio, json, sys
56from playwright.async_api import async_playwright
57async def main():
58    url = sys.argv[1]
59    expr = sys.argv[2]
60    async with async_playwright() as p:
61        b = await p.chromium.launch()
62        page = await b.new_page()
63        await page.goto(url, timeout=30000)
64        result = await page.evaluate(expr)
65        print(json.dumps(result))
66        await b.close()
67asyncio.run(main())
68"#;
69
70/// Command to install Playwright + Chromium inside a sandbox.
71pub const BROWSER_SETUP_CMD: &[&str] = &[
72    "sh",
73    "-c",
74    "pip install -q playwright && playwright install --with-deps chromium",
75];
76
77// -- v2 scripts: proxy through in-sandbox browser server on port 9222 --
78
79const BROWSER_HEALTH_SCRIPT: &str = r#"
80import json, urllib.request, sys
81port = sys.argv[1] if len(sys.argv) > 1 else "9222"
82try:
83    req = urllib.request.urlopen(f"http://127.0.0.1:{port}/health", timeout=5)
84    print(req.read().decode())
85except Exception as e:
86    print(json.dumps({"status": "down", "error": str(e)}))
87    sys.exit(1)
88"#;
89
90const BROWSER_REQUEST_SCRIPT: &str = r#"
91import json, urllib.request, sys
92port = sys.argv[1] if len(sys.argv) > 1 else "9222"
93method = sys.argv[2] if len(sys.argv) > 2 else "GET"
94path = sys.argv[3] if len(sys.argv) > 3 else "/health"
95body_str = sys.argv[4] if len(sys.argv) > 4 else None
96url = f"http://127.0.0.1:{port}{path}"
97data = body_str.encode() if body_str else None
98req = urllib.request.Request(url, data=data, method=method)
99if data:
100    req.add_header("Content-Type", "application/json")
101try:
102    resp = urllib.request.urlopen(req, timeout=60)
103    print(resp.read().decode())
104except urllib.error.HTTPError as e:
105    print(e.read().decode())
106    sys.exit(1)
107except Exception as e:
108    print(json.dumps({"error": str(e)}))
109    sys.exit(1)
110"#;
111
112/// A sandboxed headless browser controlled from outside.
113///
114/// The browser (Chromium via Playwright) runs inside an agentkernel sandbox.
115/// You call high-level methods; the SDK generates and executes scripts internally.
116///
117/// # Example
118///
119/// ```no_run
120/// # async fn example() -> agentkernel_sdk::Result<()> {
121/// let client = agentkernel_sdk::AgentKernel::builder().build()?;
122/// let mut browser = client.browser("my-browser", None).await?;
123/// let page = browser.goto("https://example.com").await?;
124/// println!("{} — {} links", page.title, page.links.len());
125/// let png = browser.screenshot(None).await?;
126/// browser.remove().await?;
127/// # Ok(())
128/// # }
129/// ```
130pub struct BrowserSession {
131    /// The sandbox name.
132    name: String,
133    /// The underlying client.
134    client: AgentKernel,
135    /// Whether `remove()` has already been called.
136    removed: bool,
137    /// Last URL visited via `goto()`.
138    last_url: Option<String>,
139    /// Whether the v2 browser server has been confirmed running.
140    server_started: bool,
141}
142
143impl BrowserSession {
144    /// Create a new `BrowserSession`.
145    ///
146    /// Prefer [`AgentKernel::browser`] which creates the sandbox and installs
147    /// Playwright for you.
148    pub(crate) fn new(name: String, client: AgentKernel) -> Self {
149        Self {
150            name,
151            client,
152            removed: false,
153            last_url: None,
154            server_started: false,
155        }
156    }
157
158    /// The sandbox name backing this browser session.
159    pub fn name(&self) -> &str {
160        &self.name
161    }
162
163    /// Navigate to a URL and return page data (title, text, links).
164    pub async fn goto(&mut self, url: &str) -> Result<PageResult> {
165        let output = self
166            .client
167            .exec_in_sandbox(&self.name, &["python3", "-c", GOTO_SCRIPT, url], None)
168            .await?;
169        self.last_url = Some(url.to_string());
170        let result: PageResult = serde_json::from_str(&output.output)?;
171        Ok(result)
172    }
173
174    /// Take a PNG screenshot. Returns the raw PNG bytes.
175    ///
176    /// If `url` is `None`, re-uses the last URL from [`goto`](Self::goto).
177    pub async fn screenshot(&self, url: Option<&str>) -> Result<Vec<u8>> {
178        let target = url
179            .map(String::from)
180            .or_else(|| self.last_url.clone())
181            .ok_or_else(|| {
182                Error::Validation("No URL specified and no previous goto() call".to_string())
183            })?;
184        let output = self
185            .client
186            .exec_in_sandbox(
187                &self.name,
188                &["python3", "-c", SCREENSHOT_SCRIPT, &target],
189                None,
190            )
191            .await?;
192        let bytes = base64::engine::general_purpose::STANDARD
193            .decode(output.output.trim())
194            .map_err(|e| Error::Server(format!("base64 decode failed: {e}")))?;
195        Ok(bytes)
196    }
197
198    /// Run a JavaScript expression on a page and return the result as JSON.
199    ///
200    /// If `url` is `None`, re-uses the last URL from [`goto`](Self::goto).
201    pub async fn evaluate(&self, expression: &str, url: Option<&str>) -> Result<serde_json::Value> {
202        let target = url
203            .map(String::from)
204            .or_else(|| self.last_url.clone())
205            .ok_or_else(|| {
206                Error::Validation("No URL specified and no previous goto() call".to_string())
207            })?;
208        let output = self
209            .client
210            .exec_in_sandbox(
211                &self.name,
212                &["python3", "-c", EVALUATE_SCRIPT, &target, expression],
213                None,
214            )
215            .await?;
216        let value: serde_json::Value = serde_json::from_str(&output.output)?;
217        Ok(value)
218    }
219
220    /// Remove the underlying sandbox. Idempotent.
221    pub async fn remove(&mut self) -> Result<()> {
222        if self.removed {
223            return Ok(());
224        }
225        self.removed = true;
226        self.client.remove_sandbox(&self.name).await
227    }
228
229    // -- v2 methods (ARIA snapshots, persistent pages, ref-based interaction) --
230
231    /// Check that the in-sandbox browser server is running.
232    async fn ensure_server(&mut self) -> Result<()> {
233        if self.server_started {
234            return Ok(());
235        }
236        let output = self
237            .client
238            .exec_in_sandbox(
239                &self.name,
240                &["python3", "-c", BROWSER_HEALTH_SCRIPT, "9222"],
241                None,
242            )
243            .await;
244        if let Ok(out) = output {
245            if out.output.contains("\"status\":\"ok\"") || out.output.contains("\"status\": \"ok\"")
246            {
247                self.server_started = true;
248                return Ok(());
249            }
250        }
251        Err(Error::Server(
252            "Browser server not running — use browser_create or MCP browser_open to start it"
253                .to_string(),
254        ))
255    }
256
257    /// Send a request to the in-sandbox browser server.
258    async fn browser_request(
259        &self,
260        method: &str,
261        path: &str,
262        body: Option<&str>,
263    ) -> Result<String> {
264        let mut cmd = vec![
265            "python3",
266            "-c",
267            BROWSER_REQUEST_SCRIPT,
268            "9222",
269            method,
270            path,
271        ];
272        if let Some(b) = body {
273            cmd.push(b);
274        }
275        let output = self.client.exec_in_sandbox(&self.name, &cmd, None).await?;
276        Ok(output.output)
277    }
278
279    /// Navigate to a URL and return an ARIA snapshot.
280    pub async fn open(&mut self, url: &str, page: Option<&str>) -> Result<AriaSnapshot> {
281        let page = page.unwrap_or("default");
282        self.ensure_server().await?;
283        let body = serde_json::json!({ "url": url }).to_string();
284        let path = format!("/pages/{page}/goto");
285        let output = self.browser_request("POST", &path, Some(&body)).await?;
286        let result: AriaSnapshot = serde_json::from_str(&output)?;
287        Ok(result)
288    }
289
290    /// Get the current ARIA snapshot without navigating.
291    pub async fn snapshot(&mut self, page: Option<&str>) -> Result<AriaSnapshot> {
292        let page = page.unwrap_or("default");
293        self.ensure_server().await?;
294        let path = format!("/pages/{page}/snapshot");
295        let output = self.browser_request("GET", &path, None).await?;
296        let result: AriaSnapshot = serde_json::from_str(&output)?;
297        Ok(result)
298    }
299
300    /// Click an element by ref ID or CSS selector. Returns a new ARIA snapshot.
301    pub async fn click(
302        &mut self,
303        page: Option<&str>,
304        ref_id: Option<&str>,
305        selector: Option<&str>,
306    ) -> Result<AriaSnapshot> {
307        let page = page.unwrap_or("default");
308        self.ensure_server().await?;
309        let mut body = serde_json::Map::new();
310        if let Some(r) = ref_id {
311            body.insert("ref".to_string(), serde_json::Value::String(r.to_string()));
312        }
313        if let Some(s) = selector {
314            body.insert(
315                "selector".to_string(),
316                serde_json::Value::String(s.to_string()),
317            );
318        }
319        let body_str = serde_json::Value::Object(body).to_string();
320        let path = format!("/pages/{page}/click");
321        let output = self.browser_request("POST", &path, Some(&body_str)).await?;
322        let result: AriaSnapshot = serde_json::from_str(&output)?;
323        Ok(result)
324    }
325
326    /// Fill an input by ref ID or CSS selector. Returns a new ARIA snapshot.
327    pub async fn fill(
328        &mut self,
329        value: &str,
330        page: Option<&str>,
331        ref_id: Option<&str>,
332        selector: Option<&str>,
333    ) -> Result<AriaSnapshot> {
334        let page = page.unwrap_or("default");
335        self.ensure_server().await?;
336        let mut body = serde_json::Map::new();
337        body.insert(
338            "value".to_string(),
339            serde_json::Value::String(value.to_string()),
340        );
341        if let Some(r) = ref_id {
342            body.insert("ref".to_string(), serde_json::Value::String(r.to_string()));
343        }
344        if let Some(s) = selector {
345            body.insert(
346                "selector".to_string(),
347                serde_json::Value::String(s.to_string()),
348            );
349        }
350        let body_str = serde_json::Value::Object(body).to_string();
351        let path = format!("/pages/{page}/fill");
352        let output = self.browser_request("POST", &path, Some(&body_str)).await?;
353        let result: AriaSnapshot = serde_json::from_str(&output)?;
354        Ok(result)
355    }
356
357    /// Close a named page.
358    pub async fn close_page(&mut self, page: Option<&str>) -> Result<()> {
359        let page = page.unwrap_or("default");
360        self.ensure_server().await?;
361        let path = format!("/pages/{page}");
362        self.browser_request("DELETE", &path, None).await?;
363        Ok(())
364    }
365
366    /// List active page names.
367    pub async fn list_pages(&mut self) -> Result<Vec<String>> {
368        self.ensure_server().await?;
369        let output = self.browser_request("GET", "/pages", None).await?;
370        let value: serde_json::Value = serde_json::from_str(&output)?;
371        let pages = value["pages"]
372            .as_array()
373            .map(|arr| {
374                arr.iter()
375                    .filter_map(|v| v.as_str().map(String::from))
376                    .collect()
377            })
378            .unwrap_or_default();
379        Ok(pages)
380    }
381}